0 Global Setup

0.1 Path

before you start: .rs.restartR()

GitClonePath <- "/Users/admin/Desktop/ElbeEstuarineZander"
setwd(GitClonePath)

0.2 Packages

# R package management tools such as packrat or renv. These tools allow you to create isolated environments with specific versions of R and installed packages, ensuring reproducibility and avoiding conflicts with other packages installed in your system.
#install.packages("packrat")
#packrat::init()
#packrat::restore()

Most of the packages needed here are stored in the packrat::restore

0.3 Input Files

Date <- "16.01.2024"
Species <- "SL"
Year       <- "2021"
Season     <- "Summer"
Analysis <- "WGCNA"

################
#Bacteria Input#
pslist <- readRDS(file.path(path_Output_16S,paste(paste(  "SL_2021_Summer_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))
pslistraw <- readRDS(file.path(path_Output_16S,paste(paste("SL_2021_Summer_ps_16S_merge_pslistraw", Date, sep="_"), ".rds", sep="")))

##################################################
#AnnotationFile Created in Sections 1.1.3 - 1.1.5#
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]

#############
#Sample File#
SAMDF<- read.table(file=file.path(path_Input_RNA, "SL_samplefile_04.01.2024.csv") ,sep=";",dec = ".")

#####################
#Count and TPM files#
cnt_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_countsNoRibo_08.01.24.rds"))
cnt_RNA_Liver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_countsNoRibo_08.01.24.rds"))
tpm_RNA_Liver  <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpm_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
################################################
#Assign the SL_RNA Output to Global Environment#
save_name_RNA_Deseq2 <- "SL_2021_Summer" #from SL_RNA_16.01.24_Paper.Rmd

DeseqLFC005 <- readRDS(file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "0.05", sep=""), Date, sep="_"), ".rds", sep=""))))
DeseqLFC1 <- readRDS(file = paste0(file.path(path_Output_RNA,  paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "1", sep=""), Date, sep="_"), ".rds", sep=""))))

0.4 Functions

0.5 Setup

traitData<- c(
    "FultonK", "Length","Weight", 
    "StomachContent", 
    "HSI",  "SSI",         
    "O2","Salinity","SecchiDepth")


#Sample Files
library(readxl)
library(tidyverse)
library(ggplot2)

SAMDF_16S<-dplyr::filter(SAMDF, DNA16S == "yes") 
rownames(SAMDF_16S) <- SAMDF_16S$SampleID

# Get all IDs present
SAMDF_RNA_Gill<-dplyr::filter(SAMDF, RNAseq == "yes") 
rownames(SAMDF_RNA_Gill) <- SAMDF_RNA_Gill$SampleID

SAMDF_RNA_Liver<-dplyr::filter(SAMDF, RNAseq == "yes") 
rownames(SAMDF_RNA_Liver) <- SAMDF_RNA_Liver$SampleID

SAMDF_RNA_Consensus<-dplyr::filter(SAMDF, RNAseq == "yes") 
rownames(SAMDF_RNA_Consensus) <- SAMDF_RNA_Consensus$SampleID

OutlineColor <- "grey20" #"white"

´ ## 0.6 Tutorials

#-

1 Abiotics

##- Figure 2C

Abiotics<-c("O2","Salinity", "SecchiDepth", "Temperature", "pH") #, "Salinity", )
AbioticsColors <- c("O2"  ="#3399FF",  "Salinity" = "#FFC333",  "SecchiDepth"= "#CC6633",  "Temperature" = "#FF0000",  "pH" ="#999333")
require(ggrepel)

  df <- na.omit(SAMDF[,c(Abiotics, "Loc", "Season")][SAMDF$Season == "Elbe_Summer_21",]%>% distinct(Loc, .keep_all = TRUE))
  df$Ekm <- c("Ekm-713", "Ekm-692", "Ekm-665", "Ekm-633")
  df$Locs <-c("MG", "BB", "SS", "ML")
  df_long <- df %>% gather(Abiotics, Value, O2:pH)
  df_long$fLoc <- as.numeric(factor(df_long$Loc, levels = unique(df_long$Loc)))

    AbioticsLinePlot <- ggplot(df_long, aes(x = fLoc, y = Value, color = Abiotics, label = round(Value, 2))) +
    geom_line() +
    geom_point(size=3) +
    geom_text_repel(aes(y = Value + 0.3), size = 4, show.legend = FALSE, fontface = "bold") +
    labs(x = "", y = "Abiotics", color = "Abiotics") +
    theme_minimal() + #atheme +
    #theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    scale_x_continuous(breaks = df_long$fLoc, labels = paste(df_long$Ekm))+  #df_long$Locs,
    scale_color_manual(values = AbioticsColors, 
                       breaks = names(AbioticsColors), 
                       labels = noquote(paste(names(AbioticsColors), " [",  noquote(Units[names(AbioticsColors)]), "]", 
                                 sep = ""))) + 
    
    theme(axis.title.x = element_blank(),
        axis.title.x.bottom = element_text(size=10,face = "bold"),
        axis.title.y.left = element_text(size=10,face = "bold"),
        strip.text.x = element_text(face = "bold"),
        strip.text.x.bottom = element_text(size = 10,face = "bold"),
        strip.text.y.left = element_text(size = 10,face = "bold"),
        axis.text.x.bottom = element_text(face = "bold", angle = 0, hjust = 0),
        axis.text.y.left = element_text(size = 10, face = "bold"),
        legend.title = element_blank(),
        legend.text = element_text(size = 10,face = "bold"), 
        axis.text.x = element_text(size=10, angle = 0),
        plot.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 9),
        plot.caption = element_text(size = 9))
    plot(AbioticsLinePlot)

    ggsave(AbioticsLinePlot, filename = paste(paste(Species, Year, Season, "Abiotics_Locs", sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 6.5,
    height = 3.5)

-

2 SSU

2.1 SSU Setup

set.seed(123)
Species    <- "SL"
Tissue     <- "Gill"
Year       <- "2021"
Season     <- "Summer"
Type       <- "16S"
Analysis   <- "WGCNA"
alpha      <- 0.05
OperatingSystem <- "Windows"
prefix <- "SSU-"

save_name_16S <- paste(Species, Year, Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA), save_name_16S, "_.RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNASL_2021_Summer_Gill_16S_WGCNA_.RData"
Samples_SSU_Gill <- c(
  "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5",  "SLSU21MGEB7",
  "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
  "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
  "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7",  "SLSU21MLEB9")

2.2 Pre-filtering & Transormation

Some comments on normalization: - Normalization: Strand et al., 2021 4.2.1. Data preprocessing Transcript expression counts were summed for each gene. Expression values were normalized across samples using the Trimmed mean of M values (TMM)-method [33], and log2- transformed. Genes with expression levels below 1.0 in all samples and/or with a standard deviation less than 0.15 were removed before network inference. This reduced the number of genes from 48,057 to 37,408. We retained OTUs that contribute at least 0.005% of the total microbial abundance. This reduced the number of OTUs from 1152 to 296 and the number of zeros in the abundance table by 50%. OTU abundances was normalized using the Cumulative Sum Scaling (CSS) method from the Bioconductor package metagenome- Seq, which by default log2-transforms the data.

Swift et al. 2021: Cumulative-Sum Score (CSS) uses robust statistics to provide an alternative to TSS that is less influenced by preferentially sampled taxa. Its normalization scaling factor is defined as the cumulative sum of the observed counts up to a threshold which is determined using a heuristic that minimizes the influence of the preferentially sampled taxa. The idea is that CSS scales each sample using only the part of the counts distribution that is relatively invariant (H. Lin &Peddada, 2020a; Weiss et al., 2017). However, the determination of the thresholds could fail due to high count variability (J. Chen et al., 2018). CSS or TSS do not account for sparsity or address compositionality.

The DESeq scaling factor of the observed abundances for each sample is computed as the median of all the ratios between counts of the sample and the reference. Assuming most taxa are not differentially abundant, the median of these ratios would provide an estimate of a correction factor for all the read counts. Like most normalizations, TMM and DESeq rely on the strong assumptions that most taxa are not differentially abundant, and of those differentially abundant, there is an approximate balanced amount of over-abundance and under-abundance (Calza & Pawitan, 2010; Dillies et al., 2012; Gloor et al., 2017; Weiss et al., 2017). These may be reasonable assumptions for gene expression data but may not be valid for microbiome data which tends to be diverse. Also, while TMM and DESEq normalization are designed for compositional data, they do not address sparsity. In fact, large fractions of zeros and relatively low sequencing depths can be problematic for both methods and can lead to serious biases (Kumar et al., 2018). As stated previously, adding pseudo-counts to the original count data will not rectify the problem.

Large scale tool benchmarking by Thorsen et al. has revealed that most of the commonly used differential (relative) abundance testing methods including edgeR and DESeq are sensitive to sparsity (Thorsen et al., 2016) and consequently exhibit unacceptably high false positive rates (Hawinkel et al., 2019). Also, recent investigations by H. Lin and Peddada (2020a, 2020b) indicate poor performance of these methods for microbiome data. However, based on simulations and analytical derivations, they attribute the poor performance to the violation of the assumption that most taxa do not change. As a result, the false discovery rate is inflated, and it increases as the sample size increases. Similar behavior of these methods was also seen by Weiss et al. (2017). Thus, H. Lin and Peddada (2020a) do not advocate the use of edgeR and DESeq for the analysis of microbiome data.

following http://mixomics.org/mixmc/mixmc-preprocessing/ Arumugam et al., (2011)

library(phyloseq)
library(tidyverse)

psraw <-pslistraw$ps_SL_21
ps <-pslist$ps_SL_21



low.count.removal <- function(
                        data, # OTU count df of size n (sample) x p (OTU)
                        percent=0.005 # cutoff chosen
                        ) {
    keep.otu = which(colSums(data)*100/(sum(colSums(data))) > percent)
    data.filter = data[,keep.otu]
    return(list(data.filter = data.filter, keep.otu = keep.otu))}

#######################################################
#Plot consequence of filtering from Strand et al, 2021#
#######################################################
frac_zero <- c()
all_sum   <- c()
num_otu   <- c()
seqp <- seq(0,0.1,0.0001)
for(i in seqp){
  result.filter = low.count.removal(otu_table(psraw), percent=i)
  length(result.filter$keep.otu)
  otu.f = result.filter$data.filter
  frac_zero <- c(frac_zero, sum(otu.f == 00)/ (ncol(otu.f)*nrow(otu.f)))
  all_sum <- c(all_sum, sum(otu.f))
  num_otu <- c(num_otu, ncol(otu.f))
}
names(frac_zero) <- seqp
names(all_sum) <- seqp

applied_filter <- 0.005
# Get all_sum and num_otu to a fraction of the total
mas <- max(all_sum)
mno <- max(num_otu)

all_sum %>% (function(x){x/max(x)}) -> all_sum
num_otu %>% (function(x){x/max(x)}) -> num_otu
data.frame(filter = seqp, 
           Percent.zeros   = frac_zero*100, 
           Total.abundance = all_sum*100, 
           Number.of.OTUs  = num_otu*100) %>% 
  tidyr::gather(key = "Type", value = "percent.of.total", -filter) %>%
  ggplot() + 
  geom_line(mapping = aes(x = filter, y = percent.of.total, col = Type)) +
  geom_vline(xintercept = applied_filter, alpha = 0.5, color = "red", linetype="dashed")+
  ylab("Percent of total") +
  xlab(paste("Filter at", applied_filter)) +
  theme(legend.title = element_blank()) -> FilterZerosPlot

FilterZerosPlot

#The Filtering has been performed in the SL_16S.rmd
# ps <-pslist$ps_SL_21
# result.filter <- low.count.removal(otu_table(ps), percent=applied_filter) 
# data.filter <- result.filter$data.filter
# length(result.filter$keep.otu) # check how many OTUs remain
#ps_SL_WGCNA <- prune_taxa(names(result.filter$keep.otu), ps)



####################
#CLR Transformation#
####################
ps_SL_WGCNA <- pslist$ps_SL_21
tail(t(otu_table(ps_SL_WGCNA)))
## OTU Table:          [6 taxa and 24 samples]
##                      taxa are rows
##                               SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV13564:Paracaedibacteraceae           0           0           0           0
## ASV14092:Roseibacillus                  0           0           0           0
## ASV14296:Acinetobacter                  0           0           0           0
## ASV14326:Streptococcus                  0           0           0           0
## ASV15061:Flavobacterium                 0           0           0           0
## ASV15299:Achromobacter                  0           0           0           0
##                               SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV13564:Paracaedibacteraceae           0           0           0           0
## ASV14092:Roseibacillus                  0           0           0           0
## ASV14296:Acinetobacter                  0           0           0           0
## ASV14326:Streptococcus                  0           0           0           0
## ASV15061:Flavobacterium                 0           0           0           0
## ASV15299:Achromobacter                  0           0           0           0
##                               SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV13564:Paracaedibacteraceae           0           0           0           0
## ASV14092:Roseibacillus                  0           0           0          35
## ASV14296:Acinetobacter                  0           0           0           0
## ASV14326:Streptococcus                  0           0          34           0
## ASV15061:Flavobacterium                 0           0           0           0
## ASV15299:Achromobacter                  0           0           0           0
##                               SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV13564:Paracaedibacteraceae           0           0           0           0
## ASV14092:Roseibacillus                  0           0           0           0
## ASV14296:Acinetobacter                  0           0           0          20
## ASV14326:Streptococcus                  0           0           0           0
## ASV15061:Flavobacterium                 0           0           1           0
## ASV15299:Achromobacter                  0           0           0          21
##                               SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV13564:Paracaedibacteraceae           0           0          31           0
## ASV14092:Roseibacillus                  0           0           0           0
## ASV14296:Acinetobacter                 12           0           0           0
## ASV14326:Streptococcus                  0           0           0           0
## ASV15061:Flavobacterium                 0           0          26           0
## ASV15299:Achromobacter                  8           0           0           0
##                               SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV13564:Paracaedibacteraceae           0           0           0           0
## ASV14092:Roseibacillus                  0           0           0           0
## ASV14296:Acinetobacter                  0           0           0           0
## ASV14326:Streptococcus                  0           0           0           0
## ASV15061:Flavobacterium                 0           0           4           0
## ASV15299:Achromobacter                  0           0           0           0
table(rowSums(t(otu_table(ps_SL_WGCNA))))
## 
##    28    29    30    31    32    33    34    35    36    37    38    39    40 
##    28    21    28    26    25    21    20    26    15    11    11     8    18 
##    41    42    43    44    45    46    47    48    49    50    51    52    53 
##    18    17    18    13    10    14    14    13     7     8    10     8     7 
##    54    55    56    57    58    59    60    61    62    63    64    65    66 
##     7     7    11    10     5     8     5     8    12     8    11     2     5 
##    67    68    69    70    72    73    74    75    76    77    78    79    80 
##     6     6     8    10     7     1     4    13     4    12     4     3     4 
##    81    82    83    84    85    86    87    88    89    90    91    92    93 
##     5     4     3    10     4     4     6     8     3     1     3     2     2 
##    94    96    97    98    99   100   101   102   103   104   105   106   107 
##     2     3     2     2     3     1     3     3     3     2     2     3     2 
##   108   109   111   112   113   114   116   117   118   119   120   121   122 
##     4     4     1     4     4     5     2     1     2     1     1     4     1 
##   123   124   125   126   127   128   129   130   132   133   134   135   136 
##     2     1     4     2     2     4     2     1     1     2     1     2     2 
##   137   138   139   140   141   142   143   144   145   146   147   148   149 
##     2     1     1     3     2     3     2     1     1     1     3     3     3 
##   150   151   152   153   154   155   156   157   159   160   162   163   164 
##     2     1     1     4     2     2     2     4     1     1     3     1     3 
##   166   167   168   169   170   171   172   174   175   178   180   181   182 
##     1     2     1     3     3     1     3     3     1     1     1     1     1 
##   184   186   187   188   189   191   193   194   195   196   197   198   201 
##     1     1     2     3     2     1     1     1     2     1     3     2     4 
##   202   204   206   208   210   212   213   215   216   217   218   219   223 
##     1     1     1     1     1     2     1     1     1     2     1     1     2 
##   227   228   230   232   233   236   238   239   240   242   243   252   253 
##     1     1     1     1     1     1     1     1     1     1     1     1     2 
##   255   259   262   264   265   267   270   274   277   279   282   283   284 
##     1     3     1     1     1     1     1     2     1     3     1     1     1 
##   285   286   287   290   291   296   297   299   309   310   312   313   320 
##     1     1     1     1     1     2     2     1     1     1     1     1     1 
##   324   325   327   328   329   332   333   334   336   340   343   344   346 
##     2     1     2     2     1     1     1     1     1     1     1     1     1 
##   352   354   355   356   359   361   362   363   365   369   370   375   377 
##     1     1     2     1     1     1     1     3     2     1     2     2     1 
##   386   395   397   398   400   410   419   420   421   434   440   443   450 
##     1     1     1     1     1     1     1     1     1     1     2     1     1 
##   451   459   463   467   469   473   480   488   493   497   499   515   516 
##     2     1     1     1     1     1     1     1     1     1     1     1     1 
##   519   527   533   543   557   566   573   593   600   604   607   613   614 
##     2     1     1     1     1     1     1     1     1     1     1     1     1 
##   615   621   624   636   640   642   647   667   668   669   671   677   685 
##     1     2     1     1     1     1     1     1     1     1     1     1     1 
##   688   689   690   692   712   719   720   721   730   740   767   778   800 
##     1     2     1     1     1     1     1     1     1     1     1     1     1 
##   804   820   826   848   863   866   868   871   884   891   897   923   925 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
##   938   940   953   984   991  1006  1056  1091  1098  1157  1182  1193  1246 
##     1     1     1     1     1     1     1     1     2     1     1     1     1 
##  1329  1331  1337  1343  1349  1361  1413  1417  1421  1433  1465  1522  1662 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
##  1697  1701  1771  1878  1946  2012  2237  2303  2328  2716  2806  3050  3240 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
##  3325  3372  3713  3780  3948  4194  4355  4478  4481  5154  5303  6348  6672 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
##  6741  7487  7812  8830  9113  9885  9901 10524 11192 11525 12945 13014 13659 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## 20884 27811 68650 
##     1     1     1
ps_SL_WGCNA_clr <- microbiome::transform(ps_SL_WGCNA, "clr")
head(t(otu_table(ps_SL_WGCNA_clr)))
## OTU Table:          [6 taxa and 24 samples]
##                      taxa are rows
##                         SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV1:Elizabethkingia       8.600036    7.970279    6.624233    7.659307
## ASV2:Citrobacter           7.237126    6.672406    4.516563    5.855956
## ASV3:Enterobacteriaceae    7.040568    6.609335    4.303730    5.687514
## ASV4:Enterobacteriaceae    6.840890    6.396027    4.232044    5.562401
## ASV5:Enterobacter          6.646325    6.371862    4.032998    5.400742
## ASV6:Enterobacteriaceae    6.470677    6.213147    3.767479    5.325949
##                         SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV1:Elizabethkingia       9.483500    8.631171    9.448500    9.027291
## ASV2:Citrobacter           7.893771    7.058225    7.952625    7.522832
## ASV3:Enterobacteriaceae    8.016703    7.075600    8.045857    7.535790
## ASV4:Enterobacteriaceae    7.842953    6.693711    7.872213    7.498632
## ASV5:Enterobacter          7.500675    6.535178    7.299677    7.172404
## ASV6:Enterobacteriaceae    7.440153    6.522424    7.426611    7.068435
##                         SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV1:Elizabethkingia       7.633066    6.782400    8.621982    9.191411
## ASV2:Citrobacter           5.561344    4.543761    6.760025    7.625637
## ASV3:Enterobacteriaceae    5.398641    4.141420    6.992007    7.346226
## ASV4:Enterobacteriaceae    5.242239    4.543761    6.548012    7.352757
## ASV5:Enterobacter          5.100682    3.354286    6.442969    6.913629
## ASV6:Enterobacteriaceae    4.815751    3.108828    6.309059    6.798151
##                         SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV1:Elizabethkingia       8.645028    9.061274    7.871273    7.338069
## ASV2:Citrobacter           7.381247    7.625210    6.442742    4.999188
## ASV3:Enterobacteriaceae    7.343888    7.516458    6.401930    5.162813
## ASV4:Enterobacteriaceae    7.226915    7.384228    6.405396    4.667424
## ASV5:Enterobacter          7.042626    7.219807    6.081399    4.599605
## ASV6:Enterobacteriaceae    6.807887    7.068901    6.062082    4.607371
##                         SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV1:Elizabethkingia       7.626074    7.908607    7.623295    8.384012
## ASV2:Citrobacter           6.048266    5.790626    6.236401    6.939127
## ASV3:Enterobacteriaceae    5.910103    5.515858    6.269074    6.923891
## ASV4:Enterobacteriaceae    5.782935    5.651755    6.212878    7.154263
## ASV5:Enterobacter          5.490392    5.259835    5.881279    6.548759
## ASV6:Enterobacteriaceae    5.285756    5.415629    5.682809    6.526166
##                         SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV1:Elizabethkingia       7.606868    8.223416    8.899901    9.267608
## ASV2:Citrobacter           6.202312    6.913825    6.932687    7.149940
## ASV3:Enterobacteriaceae    6.110288    6.631920    6.910952    7.255673
## ASV4:Enterobacteriaceae    5.866606    6.804931    7.003572    7.333534
## ASV5:Enterobacter          5.809837    6.325696    6.551248    6.927333
## ASV6:Enterobacteriaceae    5.433320    6.343234    6.551248    6.634010
tail(t(otu_table(ps_SL_WGCNA_clr)))
## OTU Table:          [6 taxa and 24 samples]
##                      taxa are rows
##                               SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV13564:Paracaedibacteraceae   -1.693732   -2.166058   -1.976644   -2.192358
## ASV14092:Roseibacillus          -1.693732   -2.166058   -1.976644   -2.192358
## ASV14296:Acinetobacter          -1.693732   -2.166058   -1.976644   -2.192358
## ASV14326:Streptococcus          -1.693732   -2.166058   -1.976644   -2.192358
## ASV15061:Flavobacterium         -1.693732   -2.166058   -1.976644   -2.192358
## ASV15299:Achromobacter          -1.693732   -2.166058   -1.976644   -2.192358
##                               SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV13564:Paracaedibacteraceae   -1.163678   -1.809827   -1.126862   -1.418989
## ASV14092:Roseibacillus          -1.163678   -1.809827   -1.126862   -1.418989
## ASV14296:Acinetobacter          -1.163678   -1.809827   -1.126862   -1.418989
## ASV14326:Streptococcus          -1.163678   -1.809827   -1.126862   -1.418989
## ASV15061:Flavobacterium         -1.163678   -1.809827   -1.126862   -1.418989
## ASV15299:Achromobacter          -1.163678   -1.809827   -1.126862   -1.418989
##                               SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV13564:Paracaedibacteraceae   -1.304547  -0.5300825   -1.796115   -1.226363
## ASV14092:Roseibacillus          -1.304547  -0.5300825   -1.796115    4.268245
## ASV14296:Acinetobacter          -1.304547  -0.5300825   -1.796115   -1.226363
## ASV14326:Streptococcus          -1.304547  -0.5300825    4.133997   -1.226363
## ASV15061:Flavobacterium         -1.304547  -0.5300825   -1.796115   -1.226363
## ASV15299:Achromobacter          -1.304547  -0.5300825   -1.796115   -1.226363
##                               SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV13564:Paracaedibacteraceae   -1.527369   -1.376554  -1.8829827   -1.615445
## ASV14092:Roseibacillus          -1.527369   -1.376554  -1.8829827   -1.615445
## ASV14296:Acinetobacter          -1.527369   -1.376554  -1.8829827    2.754044
## ASV14326:Streptococcus          -1.527369   -1.376554  -1.8829827   -1.615445
## ASV15061:Flavobacterium         -1.527369   -1.376554   0.8088769   -1.615445
## ASV15299:Achromobacter          -1.527369   -1.376554  -1.8829827    2.802231
##                               SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV13564:Paracaedibacteraceae   -1.779624   -1.271161    3.965944   -1.656262
## ASV14092:Roseibacillus          -1.779624   -1.271161   -1.853857   -1.656262
## ASV14296:Acinetobacter           2.297964   -1.271161   -1.853857   -1.656262
## ASV14326:Streptococcus          -1.779624   -1.271161   -1.853857   -1.656262
## ASV15061:Flavobacterium         -1.779624   -1.271161    3.790624   -1.656262
## ASV15299:Achromobacter           1.900937   -1.271161   -1.853857   -1.656262
##                               SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV13564:Paracaedibacteraceae    -2.19878   -1.677103   -1.322530   -1.426711
## ASV14092:Roseibacillus           -2.19878   -1.677103   -1.322530   -1.426711
## ASV14296:Acinetobacter           -2.19878   -1.677103   -1.322530   -1.426711
## ASV14326:Streptococcus           -2.19878   -1.677103   -1.322530   -1.426711
## ASV15061:Flavobacterium          -2.19878   -1.677103    2.029985   -1.426711
## ASV15299:Achromobacter           -2.19878   -1.677103   -1.322530   -1.426711
pslist$ps_WGCNA  <- ps_SL_WGCNA
pslist$clr_WGCNA <- ps_SL_WGCNA_clr

######
#Plot#
######
for (i in names(pslist)[grepl("clr_WGCNA", names(pslist))]){
  require(plyr)
  require(ggrepel) 
  require(cowplot)
  require(DESeq2)
  #if (OperatingSystem == "Mac") {quartz() }
  TSE <-mia::makeTreeSummarizedExperimentFromPhyloseq(pslist[[i]])
  tryCatch({
  g       <- paste(i); print(i)
  SSUclrPCAPlot<-pcaplotRK3(TSE,intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",ellipse = TRUE,     ellipse.prob = 0.5) + 
  scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
  scale_color_manual(values=col.Palette$SL) + atheme +
  theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
  theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
  prow <- cowplot::plot_grid(SSUclrPCAPlot, labels = c(""), ncol = 1)
  title <- ggdraw() + draw_label_themeRKwhite(paste(g), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRKwhite(paste("species_glom clr-PCA", "with",       ... = 
  length(rownames(TSE)),"bacterial species",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,   
  vjust = 1)
  SSUclrPCAPlot<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
  ggsave(SSUclrPCAPlot, filename = paste(paste(save_name_16S, "SSU", "Filter-clrPCA" , sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 7,
  height = 7)
  SSUclrPCAPlot ->> SSUclrPCAPlot
  plot(SSUclrPCAPlot)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "clr_WGCNA"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."

2.3 WGCNA

2.3.1 Data Input

#=====================================================================================
#  Code chunk 3
#=====================================================================================
library(WGCNA)
omics_data0 <- as.data.frame(otu_table(ps_SL_WGCNA_clr)) #%>% t()
dim(omics_data0 %>% paste(c("Samples", "OTUs")))
## NULL
gsg = goodSamplesGenes(omics_data0, verbose = 3);
##  Flagging genes and samples with too many missing values...
##   ..step 1
gsg$allOK
## [1] TRUE
#=====================================================================================
#  Code chunk 4
#=====================================================================================
if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
     printFlush(paste("Removing genes:", paste(names(omics_data0)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0) 
     printFlush(paste("Removing samples:", paste(rownames(omics_data0)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
}

#=====================================================================================
#  Code chunk 5
#=====================================================================================

sampleTree = hclust(dist(omics_data0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2) 
#=====================================================================================
#  Code chunk 6
#=====================================================================================

# Plot a line to show the cut
abline(h = 100, col = "red");

# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
table(clust)
## clust
##  1 
## 24
# clust 1 contains the samples we want to keep.

#in Case we would exclude outliers from the tree: 
 #keepSamples = (clust==1)
 #omics_data = omics_data0[keepSamples, ]

recordPlot() -> SampleClusteringTreePlot

#Keep all samples
omics_data <- omics_data0
 nGenes = ncol(omics_data)
 nSamples = nrow(omics_data)

#=====================================================================================
#  Code chunk 7
#=====================================================================================

datTraits<-SAMDF_16S[,traitData]
datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
collectGarbage()

#=====================================================================================
#  Code chunk 8
#=====================================================================================
# Re-cluster samples
sampleTree2 = hclust(dist(omics_data), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")

recordPlot() -> SampleClusteringTreeTraitsPlot
#=====================================================================================
#  Code chunk 9
#=====================================================================================
save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "dataInput", Date, sep="_"), ".RData", sep=""))))

##############
#Summary Plot#
##############
cowplot::plot_grid(FilterZerosPlot , SSUclrPCAPlot, labels = c("A", "B"), ncol = 2, rel_heights = c(1,1)) -> part_1
cowplot::plot_grid(part_1 , SampleClusteringTreeTraitsPlot, labels = c("", "C"), ncol = 1) -> part_2
ggsave(part_2, filename = paste(paste(save_name_16S, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10,
  height = 15)

2.3.2 Network construction

Rosales et al., 2023 Network analysis To identify ASVs that co-associate in AH, DU, and DL samples, CLR-transformed counts were used for weighted correlation network analysis (WGCNA) with the WGCNA 1.70-3R package [76]. The network was constructed unsigned with the following parameters: power = 3, minimum module size = 12, deep split = 2, and merged cut height = 0.25. The eigenvalues were correlated to AH, DU, and DL using Pearson correlation with the R function cor. The highest correlation in each disease state was then selected for network construction using the R package SpiecEasi 1.0.5 [77]. The network was then constructed as previously reported [11]. Briefly, the Stability Approach to Regularization Selection (StARS) [77] model was chosen along with the method Meinshausen–Bühlmann’s neighborhood selection [78]. For StARS, 100 subsamples were used with a variability threshold of 10−3. The centrality (node importance) was evaluated [79] using the functions centrality_degree (neighbors = the number of adjacent edges or neighbors) and centrality_edge_betweenness (centrality = the number of shortest paths going through an edge) [80]. The package influenceR 0.1.0. [81] selected important ASVs in each network and assigned the top “key players” [38], which were labeled with their respective orders.

Jameson et al., 2023 Co-occurrence patterns between taxa with putative roles in N2O production and the rest of the microbial community ASVs were explored using proportionality analysis within the propr package52. ASV tables were trimmed to select taxa that occurred ≥10 times in at least 10% of samples prior to network-level analyses to improve interpretability and minimize the risk of spurious correlations. Pairwise interactions between individual taxa with rho values greater than 0.60 were plotted using Cytoscape v3.9.0 and network topological indices were calculated using the NetworkAnalyzer tool114. Relationships between microbial community structure and rate processes were then assessed using weighted gene correlational network analysis (WGCNA) performed with the WGCNA package115. The signed adjacency measure was first calculated for each pair of features (ASVs) by raising the absolute value of their pairwise correlation coefficients to a soft-thresholding power of 8 to maximize the scale-free topology fit. Hierarchical clustering of taxa into discrete subnetworks was completed using a minimum module size threshold of 20 and a dissimilarity threshold of 0.3. Pearson correlation coefficients and corresponding p-values are reported for correlations between sample traits, subnetwork eigengenes, and individual ASVs (Supplementary Data 1). Subnetwork membership and intranetwork connectivity measures are also reported for each ASV and were used in further analyses to assess broad relationships between ASV connectivity and importance with respect to N2O production rates.

Mach et al., 2021 To confirm the results of the DE analysis, the weighted gene co-expression network analysis (WGCNA) method was also run on the “E1” matrix using the WGCNA R package (version 1.69) (Langfelder and Horvath, 2008). The parameters for the analysis were set as follows: “corFnc” = bicor, “type” = signed hybrid, “beta” = 10, “deepSplit” = 4, “minClusterSize” = 30, “cutHeight” = 0.1. The eigengenes corresponding to each identified module were correlated individually to all the 1H NMR and biochemical assay metabolites, i.e., a set of 56 different molecules (see next paragraphs). A module was then considered positively or negatively associated to this set of molecules if the Pearson r correlation values were ≥ |0.65| for at least 5 molecules and if all the corresponding p-values were ≤ 1e−05. The positively and the negatively correlated modules defined in this way were merged to obtain a single gene list, which was subsequently compared to the differentially expressed genes (DEGs) list using a Venn diagram.

Strand et al, 2021 4.2.2. Network inference For network inference, we used the Weighted Gene Co-expression Network Analysis (WGCNA) R package [24] and the function blockwiseModules with the bicor correlation measure and parameters maxBlockSize = 10000, networkType = “signed”, TOMType = “signed”, corType = “bicor”, maxPOutliers = 0.05, replaceMissingAdjacencies = TRUE, pamStage = F, deepSplit = 4, minModuleSize = 2, minKMEtoStay = 0.5, minCoreKME = 0.5, minCoreKMESize = 2, reassignThreshold = 0 mergeCutHeight = 0.4/0.5 (for microbiota/host respectively).

Our analysis relies heavily on network modules, and hence the parameters related to module detection and trimming influence the results. Briefly, deepSplit controls the sensitivity of the module detection approach by hierarchical clustering, with a value of 1 being the least sensitive and 4 being the most sensitive. minModuleSize controls the minimum size of modules in the clustering step. Nodes with a correlation to the module eigennode (KME) lower than minKMEtoStay are trimmed from the module, and the module is deleted if it does not have a core of at least minCoreKMESize nodes (with core nodes being defined as having a KME greater than minCoreKME). Finally, different modules with eigennodes that correlate above the 1 – mergeCutHeight threshold are merged. Note that the final modules can be smaller than minModuleSize due to trimming (but not smaller than minCoreKMESize), and that they can include nodes with a KME lower than minKMEtoStay due to module merging. Our parameters were set to detect highly correlated and potentially small modules initially, thus not missing interesting profiles displayed by few genes/OTUs, and then to apply an aggressive merging threshold to avoid dealing with highly redundant modules in downstream analysis.

2.3.2.1 Pick Soft Threshold
#=====================================================================================
#  Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
#  Code chunk 2
#=====================================================================================
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1091.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1091 of 1091
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1    0.410 -2.42          0.940 237.000  229.0000  385.0
## 2      2    0.637 -2.03          0.956  79.900   72.7000  184.0
## 3      3    0.750 -1.76          0.956  34.500   28.4000  103.0
## 4      4    0.819 -1.59          0.943  17.700   13.7000   63.6
## 5      5    0.840 -1.58          0.937  10.300    7.4100   45.4
## 6      6    0.901 -1.47          0.946   6.620    4.3500   34.2
## 7      7    0.937 -1.43          0.957   4.600    2.6300   27.1
## 8      8    0.920 -1.40          0.919   3.400    1.6700   23.1
## 9      9    0.905 -1.39          0.890   2.640    1.0900   20.2
## 10    10    0.953 -1.30          0.940   2.140    0.7460   17.9
## 11    12    0.892 -1.25          0.874   1.530    0.3700   15.2
## 12    14    0.913 -1.22          0.901   1.200    0.2030   14.3
## 13    16    0.879 -1.22          0.849   0.996    0.1130   13.7
## 14    18    0.863 -1.20          0.826   0.860    0.0645   13.2
## 15    20    0.892 -1.17          0.863   0.764    0.0384   12.9
# Plot the results:
# Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
# based on the criterion of approximate scale-free topology.
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
  idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
  if(!is.infinite(idx)){
    st <- sft$fitIndices[idx,1]
  } else{
    idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
    st <- sft$fitIndices[idx,1]
  }
} else{st <- sft$fitIndices[idx,1]}
# Plot Scale independence measure and Mean connectivity measure

# Scale-free topology fit index as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
           sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>% 
  ggplot() + 
  geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
  geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
  geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
  ggtitle("Scale independence") +
  xlab("Soft Threshold (power)") +
  ylab("SF Model Fit,signed R^2") +
  xlim(1,20) +
  ylim(-1,1) +
  geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05), 
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.5)-> scale_independence_plot 
  
# Mean connectivity as a function of the soft-thresholding power

data.frame(Indices = sft$fitIndices[,1],
           meanApprox = sft$fitIndices[,5]) %>% 
  ggplot() + 
  geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
  geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
  xlab("Soft Threshold (power)") +
  ylab("Mean Connectivity") +
  geom_segment(aes(x = st-0.4, 
                   y = sft$fitIndices$mean.k.[idx], 
                   xend = 0, 
                   yend = sft$fitIndices$mean.k.[idx]),
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.4) +
  ggtitle(paste0("Mean connectivity: ", 
                 round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot

cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B")) -> si_mc_plot


ggsave(si_mc_plot, filename = paste(paste(save_name_16S, "soft_thresholding_power" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
si_mc_plot

softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
softPower <- 6
2.3.2.2 blockwiseModules

deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.

##################
#blockwiseModules#
##################
softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
network = blockwiseModules(omics_data, 
                       power = st,
                       networkType = "signed", 
                       TOMType = "signed",
                       corType = "bicor",
                       minModuleSize = 5, #for genes 30
                       reassignThreshold = 0, 
                       deepSplit = 3,
                       mergeCutHeight = 0.25,
                       numericLabels = TRUE, 
                       pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"), 
                       verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file SL_Gill_16S_Summer_TOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 212 genes from module 1 because their KME is too low.
##      ..removing 82 genes from module 2 because their KME is too low.
##      ..removing 2 genes from module 3 because their KME is too low.
##      ..removing 2 genes from module 4 because their KME is too low.
##      ..removing 5 genes from module 5 because their KME is too low.
##      ..removing 2 genes from module 6 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
#                           power = st, 
#                           networkType = "signed", 
#                           TOMType = "signed",
#                           corType = "bicor",
#                           maxPOutliers = 0.05,
#                           deepSplit = 4, # Default 2
#                           minModuleSize = 2, # 30
#                           minCoreKME = 0.5,      # Default 0.5
#                           minCoreKMESize = 2,    # Default minModuleSize/3,
#                           minKMEtoStay = 0.5,    # Default 0.3
#                           reassignThreshold = 0, # Default 1e-6
#                           mergeCutHeight = 0.4,  # Default 0.15
#                           pamStage = FALSE, 
#                           pamRespectsDendro = TRUE,
#                           replaceMissingAdjacencies = TRUE,
#                           numericLabels = TRUE,
#                           saveTOMs = TRUE,
#                           saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
#                           verbose = 3)

###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs
geneTree = network$dendrograms[[1]]

save(MEs, moduleLabels, moduleColors, geneTree,
     file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "networkConstruction-auto", Date, sep="_"), ".RData", sep=""))))

SSU_Gill_WGCNA <- list("SSU_Gill_omics_data" = omics_data, 
              "SSU_Gill_datTraits"=datTraits,
              "SSU_Gill_MEs"=MEs,
              "SSU_Gill_moduleLabels"=moduleLabels,
              "SSU_Gill_moduleColors"=moduleColors,
              "SSU_Gill_geneTree"=geneTree)

saveRDS(SSU_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List", Date, sep="_"), ".rds", sep=""))))
2.3.2.3 Module & Network Visualization
#####################
#plotDendroAndColors#
#####################

##############################
#Number of ASV in each module#
##############################
as.data.frame(table(SSU_Gill_WGCNA$SSU_Gill_moduleLabels)) %>% 
  dplyr::rename(Module = Var1, Size = Freq) %>%
  dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>% 
  ggplot(aes(x = Module, y = Size, fill = Module)) +
  geom_col(color =  "#000000") +
  ggtitle("Number of genes in each module") +
  theme(legend.position = "none") + 
  scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
  geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
  ylim(0, max(module_size$Size)*1.1) +
  theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
  coord_flip() -> ASVModuleSizePlot
ASVModuleSizePlot

ggsave(ASVModuleSizePlot, filename = paste(paste(save_name_16S, "ASVModulePlot" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]

MEs_R_noME0[upper.tri(MEs_R_noME0)] %>% 
  as.data.frame() %>% 
  dplyr::rename("correlation" = ".") %>% 
  ggplot(aes(x=correlation)) + 
  geom_histogram(bins = 20) +
  #geom_density() + 
  xlim(-1, 1) +
  ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
#MEs_R_density

MEs_R_density

#MEs_R_density

pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
                   silent = T, 
                   breaks = seq(-1,1,length.out = 101),
                   treeheight_row = 5, 
                   treeheight_col = 5,
                   main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
                   labels_row = paste0(prefix, rownames(MEs_R)),
                   labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr

ggsave(MEs_R_Corr, filename = paste(paste(save_name_16S, "MEs_R_Corr" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)


cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen
density_eigen

#######################
#Network-Visualization#
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.

# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#############################
# Correlation within modules# from Strand et al, 2021
#############################
corr_within_module <- function(omics_data, network, module_x = 1){
  idx.omics_data <- which(network$colors == module_x)
  idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
  kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
  kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
  h <- as.numeric(sub("ME","", m))
  data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>% 
    ggplot() + 
    geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) + 
    xlim(-1, 1) +
    xlab("ASV correlation")+
    ggtitle(paste0(prefix,m)) -> da_plot
  ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]

cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot

ggsave(density_all_plot, filename = paste(paste(save_name_16S, "WithinModuleCorrelation" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 12)

##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, ASVModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,0.5)) -> part_2
cowplot::plot_grid(part_2, density_all_plot, labels = c("", "F"), rel_widths = c(1,0.1), ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_16S, "SSU_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12,
  height = 10)

2.4 Module-Trait-Correlation Heatmap

###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(SSU_Gill_WGCNA$SSU_Gill_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .)  %>% paste("SSU",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)

# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs) 

mME = mat %>%
  pivot_longer(-traits) %>%
  mutate(#name = gsub("ME", "", name),
    name = factor(name, levels = module_order))

textMatrixLong <-  as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
  pivot_longer(-traits) %>%
  mutate(
    #name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)

textMatrixLong2 <-  as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
  pivot_longer(-traits) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)

## add gene counts per module
 genesmod<- as.data.frame(moduleLabels)
 genesmod$genes <- rownames(genesmod)
 genesmod$Modules <- paste("SSU",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
 
ModCount <- as.data.frame(genesmod %>% 
  dplyr::group_by(Modules) %>% 
  dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]


HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
  geom_tile() +
  scale_fill_gradient2(
    low = "steelblue1",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

ggsave(A, filename = paste(paste(save_name_16S, "ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)


 
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",         
"Salinity",    
"SecchiDepth"
)) ~ "Abiotics", 
(traits %in% c(
"FultonK", 
"Length",
"Weight",
"StomachContent",
"HSI",              
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")


 
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
  geom_tile() +
    scale_fill_gradientn(
      colours = c("steelblue1", "white", "red"),
      limit = c(-1,1), 
      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
  
  scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
  facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 WGCNA_ModuleHeatmap <- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(WGCNA_ModuleHeatmap)

ggsave(WGCNA_ModuleHeatmap, filename = paste(paste(save_name_16S, "ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 7, height = 6) 
      
  
cowplot::plot_grid(WGCNA_ModuleHeatmap, AbioticsLinePlot, labels = c("A", "B"), rel_widths = c(1,0.5), ncol = 2) -> part_4
ggsave(part_4, filename = paste(paste(save_name_16S, "SSU_HmapAbiotics", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 7)

##################
#Specific Modules#
##################
 #  Module <- "darkturquoise"
 #  HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
 #  geom_tile() +
 #    scale_fill_gradientn(
 #      colours = c("steelblue1", "white", "red"),
 #      limit = c(-1,1), 
 #      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
 #  # scale_fill_gradient2(
 #  #   low = "steelblue1",
 #  #   high = "red",
 #  #   mid = "white",
 #  #   midpoint = 0,
 #  #   limit = c(-1,1)) 
 #  #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
 #  theme(axis.text.x = element_text(angle=90)) +
 #  labs(title = "", y = "Modules", fill="corr") +
 #  geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2), 
 #                      size=3, colour="grey20") +
 #  geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2), 
 #                      colour="grey20", size=3) +
 #   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
 #      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
 #      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 # A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
 #      plot(A)
 #      ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots, 
 #             device='png', dpi=300, width = 2.8,height = 7)

2.6 Dataframe Module-ASVs

#############################################
#Create Bacterial Counts and Reseq Dataframe#
#############################################  

TAX <- as.data.frame(tax_table(pslist$ps_SL_21))
OTU <- as.data.frame(t(otu_table(pslist$ps_SL_21)))
OTU$ASV <- rownames(OTU)
REFSEQ <- refseq(pslist$ps_SL_21)
REFSEQ <- stack(as.character(REFSEQ, use.names=TRUE))
colnames(REFSEQ)[colnames(REFSEQ) == "ind"] <- "ASV"
SSUData <- TAX %>% rownames_to_column("RowName") %>%
          left_join(OTU %>% rownames_to_column("RowName"), by = "RowName") %>%
          column_to_rownames("RowName")
SSUData  <- left_join(SSUData , REFSEQ)
rownames(SSUData) <- SSUData$ASV
#Count species per Sample
#ddply(SSUData ,.(ASV),numcolwise(sum))

############################################################################
#Create WGCNA Dataframe: Species, ModuleMembership, Correlation to Abiotics#
############################################################################
head(SSU_Gill_WGCNA$SSU_Gill_MEs)
##                     ME4          ME1        ME10        ME9         ME2
## SLSU21BBEB7  0.12148982  0.008285158 -0.03329939 -0.1959358 -0.15397248
## SLSU21MGEB7  0.05778488  0.361649593  0.36861328 -0.2785649 -0.22050189
## SLSU21MLEB9 -0.41317559 -0.259446826 -0.18140169 -0.2669234  0.37503141
## SLSU21SSEB9 -0.15255770 -0.180701201 -0.30363879 -0.2616119 -0.18207038
## SLSU21BBEB1  0.27275518  0.080016048  0.05260033 -0.1937149 -0.06962721
## SLSU21BBEB2  0.11108720  0.050941806  0.29121502 -0.2333171 -0.16882699
##                    ME3         ME6       ME11         ME12         ME8
## SLSU21BBEB7 -0.1102952  0.26924243  0.0857673 -0.248408637 -0.08334826
## SLSU21MGEB7 -0.2788859  0.42896168  0.3807363 -0.268125703 -0.30449636
## SLSU21MLEB9  0.3155212  0.31700626  0.2737803 -0.001790919  0.03665652
## SLSU21SSEB9  0.1438393  0.47185604  0.3614075  0.112479044  0.41489712
## SLSU21BBEB1 -0.1539043 -0.21238201  0.2393287 -0.136719927 -0.08523905
## SLSU21BBEB2 -0.1837196 -0.08721782 -0.1664743  0.205956955  0.27067429
##                     ME5          ME7         ME0
## SLSU21BBEB7  0.20722219  0.007386758 -0.24211297
## SLSU21MGEB7  0.06631932 -0.051195278 -0.18526957
## SLSU21MLEB9  0.09759643  0.090743867 -0.11656118
## SLSU21SSEB9  0.35840553  0.254344995 -0.47273731
## SLSU21BBEB1 -0.15235473 -0.112350624 -0.01538266
## SLSU21BBEB2  0.18114820  0.066572246 -0.13896766
head(SSU_Gill_WGCNA$SSU_Gill_moduleLabels)
##    ASV1:Elizabethkingia        ASV2:Citrobacter ASV3:Enterobacteriaceae 
##                       4                       4                       4 
## ASV4:Enterobacteriaceae       ASV5:Enterobacter ASV6:Enterobacteriaceae 
##                       4                       4                       4
MEs          <- SSU_Gill_WGCNA$SSU_Gill_MEs
omics_data   <- SSU_Gill_WGCNA$SSU_Gill_omics_data
datTraits    <- SSU_Gill_WGCNA$SSU_Gill_datTraits
moduleLabels <- SSU_Gill_WGCNA$SSU_Gill_moduleLabels
modNames     <- names(MEs)
nSamples     <- nrow(omics_data)

TraitOfInterest <- "O2"
DatTraitOfInterest <- as.data.frame(datTraits[TraitOfInterest])

SSUWGCNAlist <- list()
for (i in names(MEs)) {

  a <- length(SSUWGCNAlist)
  ModuleOfInterst <- paste(sub("ME", "", i))
  
  geneModuleMembership = as.data.frame(cor(omics_data, MEs, use = "p"));
  MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
  names(geneModuleMembership) = paste("MM", modNames, sep="");
  names(MMPvalue) = paste("p.MM", modNames, sep="");

  geneTraitSignificance = as.data.frame(cor(omics_data, DatTraitOfInterest, use = "p"));
  GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
  names(geneTraitSignificance) = paste("GS.", names(DatTraitOfInterest), sep="");
  names(GSPvalue) = paste("p.GS.", names(DatTraitOfInterest), sep="");

  
  OmicsPerModule <- as.data.frame(names(moduleLabels[moduleLabels == ModuleOfInterst]))
  names(OmicsPerModule) <- paste("ME", ModuleOfInterst, sep="")
  rownames(OmicsPerModule) <- OmicsPerModule[,1]
 
  SSUWGCNAdata <- OmicsPerModule %>% rownames_to_column("RowName") %>%
          left_join(geneModuleMembership[rownames(OmicsPerModule),, drop = FALSE][names(geneModuleMembership) %in%
          paste("MMME", ModuleOfInterst, sep="")] %>% rownames_to_column("RowName"), by = "RowName") %>%
          column_to_rownames("RowName")
  SSUWGCNAdata  <- SSUWGCNAdata  %>% rownames_to_column("RowName") %>%
          left_join(geneTraitSignificance[rownames(OmicsPerModule),, drop = FALSE][names(geneTraitSignificance) %in%
          paste("GS.", TraitOfInterest, sep="")] %>% rownames_to_column("RowName"), by = "RowName") %>%
          column_to_rownames("RowName")

  SSUWGCNAdata  <- SSUWGCNAdata  %>% rownames_to_column("RowName") %>%
          left_join(SSUData[rownames(SSUData) %in% rownames(SSUWGCNAdata),] %>% rownames_to_column("RowName"), 
          by = "RowName") %>% column_to_rownames("RowName")
  
  #Reoder by kME/Module Membership
  SSUWGCNAdata <- SSUWGCNAdata %>%
            dplyr::arrange(desc(SSUWGCNAdata[paste("MMME", ModuleOfInterst, sep="")]))

  SSUWGCNAlist[[a+1]] <- SSUWGCNAdata
  
  names(SSUWGCNAlist)[[a+1]] <- paste("SSU",ModuleOfInterst, sep="")
  
  write.csv2(SSUWGCNAdata, paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, 
            paste("SSU",ModuleOfInterst, sep=""), Date, sep="_"),".csv", sep=""))))
  
}

SSUWGCNAlist$All <- SSUData

saveRDS(SSUWGCNAlist, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List_2", Date, sep="_"), ".rds", sep=""))))

#Export ASVs as Fasta 
# pslist$ps_SL %>%
#       refseq() %>%
#       Biostrings::writeXStringSet(paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23.asv.fna")), append=FALSE,
#                                   compress=FALSE, compression_level=NA, format="fasta")
# fasta_sequences <- Biostrings::readDNAStringSet(paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23.asv.fna")))
# target_names    <- SSUWGCNAlist$SSU8$ASV
# # Subset the sequences based on the target names
# subset_sequences <- fasta_sequences[names(fasta_sequences) %in% target_names]
# Biostrings::writeXStringSet(subset_sequences, paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23_SSU8.asv.fna")))

#######################################
#Extract and Inspect Sequences on NCBI#
#######################################  
  # minTotRelAbun = 0.001
  # x = taxa_sums(pslist$ps_Mock)
  # keepTaxa = which((x / sum(x)) > minTotRelAbun)
  # prunedSet = prune_taxa(as.character(keepTaxa), pslist$ps_Mock)
  # 
  # prunedSet %>%
  #     refseq() %>%
  #     Biostrings::writeXStringSet("~/asv.fna", append=FALSE,
  #                                 compress=FALSE, compression_level=NA, format="fasta")

2.7 Taxa Cluster-Heatmap

SSU_Gill_WGCNA <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List", Date, sep="_"), ".rds", sep=""))))

SSU_Gill_WGCNA_list <- list("SSU_Gill_WGCNA" = SSU_Gill_WGCNA)

#################################
#Cluster-Heatmap of Module Genes# in a loop for correlation higher 3
#################################
ExpressionSet <- as.data.frame(SSU_Gill_WGCNA$SSU_Gill_omics_data)
moduleLabels  <- SSU_Gill_WGCNA$SSU_Gill_moduleLabels
InterestingComparison <- as.data.frame(names(SSU_Gill_WGCNA$SSU_Gill_MEs))

for (i in names(SSU_Gill_WGCNA_list)[grepl("SSU_Gill_WGCNA", names(SSU_Gill_WGCNA_list))]) {
  #https://github.com/kevinblighe/E-MTAB-6141
  tryCatch({
    
    g           <- paste(i)
    gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i); print(gg)

    for (MODULE in unique(as.character(InterestingComparison$name))) {
      tryCatch({
        
      genes_of_interest <- names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])
      print(MODULE)
      #print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))  
      
      FILENAME    <- paste(paste(save_name_16S, "Heatplot", sub("ME", "SSU", MODULE), sep="_"),".png", sep="")
      FILENAME2   <- paste(paste(save_name_16S, "Heatplot", sub("ME", "SSU", MODULE), "NoClust",sep="_"),".png", sep="")
      
      TITLE       <- draw_label_themeRKwhite(paste(Species, gg, Type, MODULE),
                                    element = "plot.title", x =  0.05, hjust = 0, vjust = 1)
      #For any unknown reason gene names like trnat-ugu_39 get changes by WGCNA to trnat.ugu_39

      rowclusternum  <- 1
      columnclusternum    <- 1

      require(DESeq2)
      require(tidyverse)
      require(ggplot2)

    BacteriaHeatPlotRKnoClust_SL(res = res, clr = ExpressionSet, Species = Species, min_count = 10,
                                        genes_of_interest = genes_of_interest, Samples = Samples_SSU_Gill, 
                                        SAMDF = SAMDF_16S,  TITLE = TITLE, filename= FILENAME, OutlineColor = "grey20")

    if (MODULE == "ME2") {
      plot(A)
    } else {
    print("Plots are saved to the pathPlots")
    }
    
    
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "Gill"
## [1] "ME4"
## [1] "Plots are saved to the pathPlots"
## [1] "ME1"
## [1] "Plots are saved to the pathPlots"
## [1] "ME10"
## [1] "Plots are saved to the pathPlots"
## [1] "ME9"
## [1] "Plots are saved to the pathPlots"
## [1] "ME2"
## [1] "ME3"
## [1] "Plots are saved to the pathPlots"
## [1] "ME6"
## [1] "Plots are saved to the pathPlots"
## [1] "ME11"
## [1] "Plots are saved to the pathPlots"
## [1] "ME12"
## [1] "Plots are saved to the pathPlots"
## [1] "ME8"
## [1] "Plots are saved to the pathPlots"
## [1] "ME5"
## [1] "Plots are saved to the pathPlots"
## [1] "ME7"
## [1] "Plots are saved to the pathPlots"
## [1] "ME0"

## [1] "Plots are saved to the pathPlots"

#-

2.8 SSU Function Description

Here I did a Picrust2 analysis but it was not very informative, I guess there are just to many unknown species in Fish gills

2.8.1 Pathogenes from Literature

#Austin, B., Austin, D. A., & Munn, C. B. (2007). Bacterial fish pathogens: disease of farmed and wild fish (Vol. 26, p. 552). Chichester: Springer.
PathogeneSpecies <-
  c("Clostridium botulinum",
"Eubacterium tarantellae",
"Carnobacterium maltaromaticum",
"Enterococcus faecalis",
"Vagococcus salmoninarum", 
"Lactobacillus" ,
"Lactococcus garvieae", 
"Enterococcus seriolicida",
"Lactococcus piscium", 
"Streptococcus dysgalactiae", 
"Streptococcus agalactiae", 
"Streptococcus ictaluri",
"Streptococcus parauberis", 
"Streptococcus phocae",
"Renibacterium salmoninarum",
"Erysipelothrix rhusiopathiae", 
"Bacillus",
"Bacillus cereus",
"Bacillus mycoides",
"Corynebacterium aquaticum", 
"Weissella", 
"Weissella ceti", 
"Micrococcus", 
"Mycobacterium", 
"Mycobacterium abscessus",
"Mycobacterium anabanti",
"Mycobacterium avium",
"Mycobacterium chelonei", 
"Mycobacterium fortuitum",
"Mycobacterium gordonae",
"Mycobacterium marinum",
"Mycobacterium montefiorense",
"Mycobacterium neoaurum",
"Mycobacterium piscium",
"Mycobacterium platypoecilus",
"Mycobacterium poriferae",
"Mycobacterium pseudoshottsii",
"Mycobacterium ranae",
"Mycobacterium salmoniphilum",
"Mycobacterium shottsii",
"Mycobacterium scrofulaceum",
"Mycobacterium simiae",
"Mycobacterium smegmatis",
"Mycobacterium ulcerans",
"Nocardia asteroides",
"Nocardia salmonicida",
"Nocardia seriolae",
"Rhodococcus",
"Rhodococcus erythropolis",
"Rhodococcus qingshengii",
"Planococcus", 
"Staphylococcus epidermidis", 
"Staphylococcus warneri", 
"Aeromonas allosaccharophila",
"Aeromonas bestiarum",
"Aeromonas caviae", 
"Aeromonas dhakensis", 
"Aeromonas hydrophila",
"Aeromonas liquefaciens",
"Aeromonas punctata",
"Aeromonas jandaei",
"Aeromonas piscicola", 
"Aeromonas salmonicida", 
"Aeromonas sobria", 
"Aeromonas schubertii", 
"Aeromonas veronii", 
"Pseudoalteromonas piscicida", 
"Pseudoalteromonas undina", 
"Shewanella putrefaciens", 
"Arcobacter cryaerophilus", 
"Delftia acidovorans", 
"Citrobacter freundii", 
"Edwardsiella anguillarum", 
"Edwardsiella ictaluri", 
"Edwarsiella piscicida", 
"Edwardsiella tarda", 
"Enterobacter cloacae", 
"Plesiomonas shigelloides", 
"Pantoea",
"Enterobacter agglomerans", 
"Providencia rettgeri",
"Proteus rettgeri",
"Providencia vermicola", 
"Salmonella enterica", 
"Serratia liquefaciens", 
"Serratia plymuthica", 
"Yersinia ruckeri", 
"Chryseobacterium aahli",
"Chryseobacterium balustinum", 
"Flavobacterium balustinum",
"Chryseobacterium indologenes", 
"Chryseobacterium scophthalmum",
"Chryseobacterium piscicola",
"Flavobacterium scophthalmum",
"Flavobacterium branchiophilum", 
"Flavobacterium columnare", 
"Flexibacter columnaris",
"Cytophaga columnaris" ,
"Flavobacterium hydatis",
"Cytophaga aquatilis",
"Flavobacterium johnsoniae" ,
"Cytophaga johnsonae",
"Flavobacterium oncorhynchi" ,
"Flavobacterium psychrophilum",
"Cytophaga psychrophile",
"Flavobacterium spartensis" ,
"Flavobacterium succinicans" ,
"Flectobacillus roseus" ,
"Tenacibaculum dicentrarchi", 
"Tenacibaculum discolor" ,
"Tenacibaculum gallaicum", 
"Tenacibaculum maritimum",
"Flexibacter maritimus",
"Tenacibaculum ovolyticum" ,
"Flexibacter ovolyticus",
"Tenacibaculum soleae" ,
"Francisella noatunensis" ,
"Francisella philomiragia",
"Francisella piscicida",
"Hahella chejuensis" ,
"Halomonas cupida",
"Deleya cupida" ,
"Acinetobacter johnsonii" ,
"Acinetobacter iwoffi" ,
"Moritella marina" ,
"Moritella viscosa" ,
"Mycoplasma" ,
"Myxococcus piscicola" ,
"Aquaspirillum",
"Janthinobacterium lividum ",
"Pasteurella skyensis" ,
"Piscirickettsia salmonis ",
"Pseudomonas aeruginosa ",
"Pseudomonas alcaligenes" ,
"Pseudomonas baetica" ,
"Pseudomonas chlororaphis ",
"Pseudomonas fluorescens" ,
"Pseudomonas koreensis" ,
"Pseudomonas mosselii" ,
"Pseudomonas plecoglossicida" ,
"Pseudomonas pseudoalcaligenes" ,
"Pseudomonas putida" ,
"Photobacterium damselae", 
"Pasteurella piscicida",
"Aliivibrio fischeri" ,
"Aliivibrio logei" ,
"Aliivibrio salmonicida" ,
"Aliivibrio wodanis" ,
"Vibrio aestuarianus" ,
"Vibrio alginolyticus" ,
"Vibrio anguillarum" ,
"Listonella anguillarum",
"Vibrio furnissii",
"Vibrio harveyi",
"Vibrio carchariae" ,
"Vibrio trachuri" ,
"Vibrio ichthyoenteri",
"Vibrio mimicus" ,
"Vibrio ordalii",
"Vibrio parahaemolyticus",
"Vibrio ponticus", 
"Vibrio scophthalmi" ,
"Vibrio splendidus" ,
"Vibrio tapetis" ,
"Vibrio vulnifi" ,
"Candidatus ctinochlamydia clariae",
"Candidatus Arthromitus",
"Candidatus Branchiomonas cisticola",
"Candidatus Clavochlamydia salmonicola",
"Candidatus Piscichlamydia salmonis",
"Candidatus Renichlamydia lutjanid",
"Candidatus Similichlamydia latridicola",
"Candidatus Syngnamydia Venezia",
"Streptobacillus" ,
"Varracalbmi", 
"Candidatus Megaira") #Added by me


PathogeneGenus <-c(
  "Clostridium",
  "Eubacterium",
  "Carnobacterium",
  "Enterococcus",
  "Vagococcus", 
  "Lactobacillus" ,
  "Lactococcus", 
  "Enterococcus",
  "Streptococcus",
  "Renibacterium",
  "Erysipelothrix", 
  "Bacillus",
  "Corynebacterium", 
  "Weissella", 
  "Micrococcus", 
  "Mycobacterium", 
  "Nocardia",
  "Rhodococcus",
  "Planococcus", 
  "Staphylococcus", 
  "Aeromonas",
  "Pseudoalteromonas", 
  "Shewanella", 
  "Arcobacter", 
  "Delftia", 
  "Citrobacter", 
  "Edwardsiella", 
  "Enterobacter", 
  "Plesiomonas", 
  "Pantoea",
  "Enterobacter", 
  "Providencia",
  "Salmonella", 
  "Serratia", 
  "Yersinia", 
  "Chryseobacterium", 
  "Flavobacterium", 
  "Flexibacter",
  "Cytophaga",
  "Flectobacillus" ,
  "Tenacibaculum", 
  "Francisella" ,
  "Hahella",
  "Halomonas",
  "Deleya" ,
  "Acinetobacter" ,
  "Moritella" ,
  "Mycoplasma" ,
  "Myxococcus" ,
  "Aquaspirillum",
  "Janthinobacterium",
  "Pasteurella" ,
  "Piscirickettsia",
  "Pseudomonas",
  "Photobacterium", 
  "Pasteurella",
  "Aliivibrio" ,
  "Vibrio" ,
  "Listonella",
  "Candidatus",
  "Streptobacillus" ,
"Varracalbmi",
"Candidatus Megaira",
"Candidatus ctinochlamydia",
"Candidatus Arthromitus",
"Candidatus Branchiomonas",
"Candidatus Clavochlamydia",
"Candidatus Piscichlamydia",
"Candidatus Renichlamydia",
"Candidatus Similichlamydia",
"Candidatus Syngnamydia")

for (i in names(pslist)[grepl("ps_SLWF_21", names(pslist))]){
  require(tidyverse)
  require(phyloseq)
  require(cowplot)
  tryCatch({
  g       <- paste(i); print(g)
  gg      <- sub('ps_', '', g)
  FILENAME    <- paste(paste(save_name_16S, "Pathogenes_Genus_From_Literature", sep="_"),".png", sep="")

  sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
  
  Aps_alpha_barplot <- pslist[[i]] %>%
  #tax_glom(taxrank = "Genus") %>%                     #merges species that have the same taxonomy at a certain rank
  transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
  psmelt() %>%                                         # Melt to long format
  #filter(Abundance > 0.05) %>%                         # Filter out low abundance taxa
  arrange(Genus)                                      # Sort data frame alphabetically by phylum

  Aps_alpha_barplot$Reps <- sub(Species, "", Aps_alpha_barplot$Replicate2)  
  
  Aps_alpha_barplot$ASV <- sub(".*:", "", Aps_alpha_barplot$OTU)  
  Aps_alpha_barplot$ASV <- sub('\\.', ' ', Aps_alpha_barplot$ASV)
  #print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% PathogeneGenus,] #PathogeneSpecies
  #print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
  Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
  #https://gist.github.com/svigneau/05148a7031172c2bc70d
  
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))

  Aps_alpha_barplot_Pathogens$Labels <- labels
  print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 5))

  df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
              OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
  
  df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
  Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance, 
                                                                  decreasing = TRUE), ]
  top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
  
  LocOrderSL=c(
  "Medem Grund","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch") 

    microbiomeseq_cols <- function(){
    colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
               "#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
               "#FFFF00",
                grey.colors(1000));return(colours)}
    colours <-   microbiomeseq_cols() 
    
    RepOrder <-c( 
    "WF","SU21MG","SU21BB", "SU21SS", "SU21ML")
    
   # if (gg == "GC") {
   #   RepOrder <- GCOrder
   #   } else if (gg == "OE") {
   #   RepOrder <- OEOrder
   #   } else if (gg == "SL") {
   #     RepOrder <- SLOrder }
  df <- Aps_alpha_barplot_Pathogens
 
  p <-ggplot(df, 
            aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) + 
  geom_bar(stat = "identity") +
  facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
  #scale_x_discrete(labels = LocOrder) + 
  atheme2 +
  ylab("Relative Abundance [%] \n") + xlab("Sample") +
  #scale_color_manual(values= col.Palette.species2)+
  #scale_fill_manual(values= col.Palette.species2)
  ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
  scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
  ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
   inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
  
  atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(axis.text.x=element_blank(),
        #axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank() 
        #axis.title.x = element_blank()
        ) +
  theme(
    panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    #panel.grid.major = element_blank(), #remove major gridlines
    #panel.grid.minor = element_blank(), #remove minor gridlines
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent') #transparent legend panel
  ) +
  theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold"))
  
  g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(col.Palette[[Species]], 0.7)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
  prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
  AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
  ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
  height = 8)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ps_SLWF_21"
##   Labels ReadNum    SampleID                    OTU
## 1  16681   16681 SLSU21BBEB1   ASV132:Acinetobacter
## 2          16681 SLSU21BBEB1 ASV14296:Acinetobacter
## 3          16681 SLSU21BBEB1   ASV159:Acinetobacter
## 4          16681 SLSU21BBEB1  ASV1630:Acinetobacter
## 5          16681 SLSU21BBEB1  ASV1799:Acinetobacter
GroupOfInterest <- c("ASV13:Acinetobacter.lwoffii",
"ASV24:Acinetobacter",
"ASV25:Polynucleobacter",
"ASV40:Acinetobacter.johnsonii",
"ASV50:Candidatus Megaira",
"ASV213:Aeromonas",
"ASV218:Shewanella.putrefaciens",
"ASV1670:Pseudomonas.putida",
"ASV1718:Shewanella",
"ASV2290:Chryseobacterium.haifense")
  
  for (i in names(pslist)[grepl("ps_WGCNA", names(pslist))]){
    require(tidyverse)
    require(phyloseq)
    require(cowplot)
    require(plyr)
    tryCatch({
      g       <- paste(i); print(g)
      gg      <- sub('ps_', '', g)
      FILENAME    <- paste(paste(save_name_16S, "Pathogenic_ASVs_Genus_Level", sep="_"),".png", sep="")
      
      sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
  
  Aps_alpha_barplot <- pslist[[i]] %>%
  #tax_glom(taxrank = "Genus") %>%                      #merges species that have the same taxonomy at a certain rank
  transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
  psmelt() #%>%                                         # Melt to long format
  #filter(Abundance > 0.05) %>%                         # Filter out low abundance taxa
  #arrange(Species)                                      # Sort data frame alphabetically by phylum

  Aps_alpha_barplot$Reps <- sub(gg, "", Aps_alpha_barplot$Replicates)  
  Aps_alpha_barplot$ASV <- sub(".*:", "", Aps_alpha_barplot$OTU)  
  Aps_alpha_barplot$ASV <- sub('\\.', ' ', Aps_alpha_barplot$ASV)
  #print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)

  GroupOfInterestSpecies <- sub(".*:", "", GroupOfInterest) 
  GroupOfInterestSpecies <-sub('\\.', ' ', GroupOfInterestSpecies)
  
  ######################
  #PathogeneSpecies ASV#
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% GroupOfInterestSpecies,] 
  
  ################
  #For Genus glom#
   #Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$Genus %in% GroupOfInterestSpecies,] 
  
  
  #print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
  Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
  #https://gist.github.com/svigneau/05148a7031172c2bc70d
  
  SLOrder <-c( 
    "SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
    
   if (gg == "GC") {
     RepOrder <- GCOrder
     } else if (gg == "OE") {
     RepOrder <- OEOrder
     } else if (gg == "SL") {
       RepOrder <- SLOrder }
  
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))

  Aps_alpha_barplot_Pathogens$Labels <- labels
  print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 100))

  df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
              OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
  
  df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
  Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance, 
                                                                  decreasing = TRUE), ]
  top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
  
  LocOrderSL=c(
  "Medem Grund","Oste","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch") 

    microbiomeseq_cols <- function(){
    colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
               "#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
               "#FFFF00",
                grey.colors(1000));return(colours)}
    colours <-   microbiomeseq_cols() 
    RepOrder <-  SLOrder
    
  df <- Aps_alpha_barplot_Pathogens
  p<-ggplot(df, 
            aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) + 
  geom_bar(stat = "identity") +
  facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
  #scale_x_discrete(labels = LocOrder) + 
  atheme2 +
  ylab("Relative Abundance [%] \n") + xlab("Sample") +
  #scale_color_manual(values= col.Palette.species2)+
  #scale_fill_manual(values= col.Palette.species2)
  ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
  scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
  ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
   inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
 
  atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank()) +
  theme(panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent')) + #transparent legend panel
  theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold"))

  g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(col.Palette[[Species]], 0.7)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
  prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
  AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
  ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
  height = 8)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}  
## [1] "ps_WGCNA"
##     Labels ReadNum    SampleID                               OTU
## 1    16628   16628 SLSU21BBEB1                  ASV227:Aeromonas
## 2            16628 SLSU21BBEB1                  ASV211:Aeromonas
## 3            16628 SLSU21BBEB1                  ASV330:Aeromonas
## 4            16628 SLSU21BBEB1         ASV520:Candidatus Megaira
## 5            16628 SLSU21BBEB1       ASV13:Acinetobacter.lwoffii
## 6            16628 SLSU21BBEB1           ASV130:Polynucleobacter
## 7            16628 SLSU21BBEB1               ASV10179:Shewanella
## 8            16628 SLSU21BBEB1       ASV10460:Candidatus Megaira
## 9            16628 SLSU21BBEB1         ASV10679:Polynucleobacter
## 10           16628 SLSU21BBEB1          ASV1123:Polynucleobacter
## 11           16628 SLSU21BBEB1        ASV1140:Candidatus Megaira
## 12           16628 SLSU21BBEB1                 ASV1150:Aeromonas
## 13           16628 SLSU21BBEB1                ASV1240:Shewanella
## 14           16628 SLSU21BBEB1         ASV12665:Polynucleobacter
## 15           16628 SLSU21BBEB1              ASV132:Acinetobacter
## 16           16628 SLSU21BBEB1            ASV14296:Acinetobacter
## 17           16628 SLSU21BBEB1                 ASV146:Shewanella
## 18           16628 SLSU21BBEB1                ASV1490:Shewanella
## 19           16628 SLSU21BBEB1          ASV1502:Polynucleobacter
## 20           16628 SLSU21BBEB1                  ASV158:Aeromonas
## 21           16628 SLSU21BBEB1              ASV159:Acinetobacter
## 22           16628 SLSU21BBEB1             ASV1630:Acinetobacter
## 23           16628 SLSU21BBEB1                ASV1632:Shewanella
## 24           16628 SLSU21BBEB1        ASV1693:Pseudomonas.putida
## 25           16628 SLSU21BBEB1                ASV1734:Shewanella
## 26           16628 SLSU21BBEB1             ASV1799:Acinetobacter
## 27           16628 SLSU21BBEB1           ASV180:Polynucleobacter
## 28           16628 SLSU21BBEB1             ASV1841:Acinetobacter
## 29           16628 SLSU21BBEB1             ASV1843:Acinetobacter
## 30           16628 SLSU21BBEB1             ASV1948:Acinetobacter
## 31           16628 SLSU21BBEB1    ASV195:Acinetobacter.johnsonii
## 32           16628 SLSU21BBEB1                ASV2142:Shewanella
## 33           16628 SLSU21BBEB1             ASV2175:Acinetobacter
## 34           16628 SLSU21BBEB1    ASV218:Shewanella.putrefaciens
## 35           16628 SLSU21BBEB1    ASV221:Shewanella.putrefaciens
## 36           16628 SLSU21BBEB1        ASV2262:Candidatus Megaira
## 37           16628 SLSU21BBEB1 ASV2329:Chryseobacterium.haifense
## 38           16628 SLSU21BBEB1               ASV24:Acinetobacter
## 39           16628 SLSU21BBEB1            ASV25:Polynucleobacter
## 40           16628 SLSU21BBEB1      ASV259:Acinetobacter.lwoffii
## 41           16628 SLSU21BBEB1                ASV3013:Shewanella
## 42           16628 SLSU21BBEB1                 ASV311:Shewanella
## 43           16628 SLSU21BBEB1            ASV32:Polynucleobacter
## 44           16628 SLSU21BBEB1                ASV3268:Shewanella
## 45           16628 SLSU21BBEB1              ASV331:Acinetobacter
## 46           16628 SLSU21BBEB1                 ASV3328:Aeromonas
## 47           16628 SLSU21BBEB1              ASV382:Acinetobacter
## 48           16628 SLSU21BBEB1          ASV3932:Polynucleobacter
## 49           16628 SLSU21BBEB1     ASV40:Acinetobacter.johnsonii
## 50           16628 SLSU21BBEB1                 ASV413:Shewanella
## 51           16628 SLSU21BBEB1             ASV4141:Acinetobacter
## 52           16628 SLSU21BBEB1                 ASV415:Shewanella
## 53           16628 SLSU21BBEB1          ASV4290:Polynucleobacter
## 54           16628 SLSU21BBEB1              ASV438:Acinetobacter
## 55           16628 SLSU21BBEB1                  ASV485:Aeromonas
## 56           16628 SLSU21BBEB1             ASV4863:Acinetobacter
## 57           16628 SLSU21BBEB1                ASV4873:Shewanella
## 58           16628 SLSU21BBEB1          ASV49:Candidatus Megaira
## 59           16628 SLSU21BBEB1                ASV5011:Shewanella
## 60           16628 SLSU21BBEB1                ASV5095:Shewanella
## 61           16628 SLSU21BBEB1                 ASV579:Shewanella
## 62           16628 SLSU21BBEB1             ASV6047:Acinetobacter
## 63           16628 SLSU21BBEB1           ASV611:Polynucleobacter
## 64           16628 SLSU21BBEB1          ASV6554:Polynucleobacter
## 65           16628 SLSU21BBEB1    ASV666:Acinetobacter.johnsonii
## 66           16628 SLSU21BBEB1        ASV6742:Candidatus Megaira
## 67           16628 SLSU21BBEB1                 ASV708:Shewanella
## 68           16628 SLSU21BBEB1              ASV768:Acinetobacter
## 69           16628 SLSU21BBEB1              ASV770:Acinetobacter
## 70           16628 SLSU21BBEB1                ASV7890:Shewanella
## 71           16628 SLSU21BBEB1              ASV798:Acinetobacter
## 72           16628 SLSU21BBEB1                  ASV806:Aeromonas
## 73           16628 SLSU21BBEB1                 ASV837:Shewanella
## 74           16628 SLSU21BBEB1                  ASV846:Aeromonas
## 75           16628 SLSU21BBEB1    ASV939:Acinetobacter.johnsonii
## 76           16628 SLSU21BBEB1               ASV95:Acinetobacter
## 77           16628 SLSU21BBEB1           ASV956:Polynucleobacter
## 78           16628 SLSU21BBEB1           ASV964:Polynucleobacter
## 79           16628 SLSU21BBEB1    ASV977:Acinetobacter.johnsonii
## 80           16628 SLSU21BBEB1              ASV980:Acinetobacter
## 81           16628 SLSU21BBEB1                  ASV981:Aeromonas
## 82           16628 SLSU21BBEB1          ASV99:Candidatus Megaira
## 83   23860   23860 SLSU21BBEB2                  ASV211:Aeromonas
## 84           23860 SLSU21BBEB2       ASV13:Acinetobacter.lwoffii
## 85           23860 SLSU21BBEB2         ASV520:Candidatus Megaira
## 86           23860 SLSU21BBEB2                  ASV158:Aeromonas
## 87           23860 SLSU21BBEB2                  ASV227:Aeromonas
## 88           23860 SLSU21BBEB2              ASV382:Acinetobacter
## 89           23860 SLSU21BBEB2              ASV159:Acinetobacter
## 90           23860 SLSU21BBEB2                  ASV330:Aeromonas
## 91           23860 SLSU21BBEB2     ASV40:Acinetobacter.johnsonii
## 92           23860 SLSU21BBEB2                ASV1734:Shewanella
## 93           23860 SLSU21BBEB2               ASV24:Acinetobacter
## 94           23860 SLSU21BBEB2           ASV180:Polynucleobacter
## 95           23860 SLSU21BBEB2            ASV25:Polynucleobacter
## 96           23860 SLSU21BBEB2          ASV49:Candidatus Megaira
## 97           23860 SLSU21BBEB2               ASV10179:Shewanella
## 98           23860 SLSU21BBEB2       ASV10460:Candidatus Megaira
## 99           23860 SLSU21BBEB2         ASV10679:Polynucleobacter
## 100          23860 SLSU21BBEB2          ASV1123:Polynucleobacter

2.8.2 ASV level Plot form

GroupOfInterest <- c(
"ASV15:Verticiella",
"ASV1432:Verticiella",
"ASV306:Shewanella.baltica",
"ASV1632:Shewanella",
"ASV708:Shewanella",
"ASV413:Shewanella",
"ASV146:Shewanella",
"ASV485:Aeromonas",
"ASV977:Acinetobacter.johnsonii",
"ASV195:Acinetobacter.johnsonii",
"ASV95:Acinetobacter",
"ASV1105:Pseudomonas",
"ASV221:Shewanella.putrefaciens",
"ASV146:Shewanella",
"ASV846:Aeromonas",
"ASV806:Aeromonas",
"ASV3501:Chryseobacterium.piscicola",
"ASV306:Shewanella.baltica",
"ASV415:Shewanella"
)
  
for (i in names(pslist)[grepl("ps_WGCNA", names(pslist))]){
    require(tidyverse)
    require(phyloseq)
    require(cowplot)
    require(plyr)
    tryCatch({
      g       <- paste(i); print(g)
      gg      <- sub('ps_', '', g)
      FILENAME    <- paste(paste(save_name_16S, "Pathogenic_ASVs_Level", sep="_"),".png", sep="")
      sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
  
  Aps_alpha_barplot <- pslist[[i]] %>%
  #tax_glom(taxrank = "Genus") %>%                      #merges species that have the same taxonomy at a certain rank
  transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
  psmelt() #%>%                                         # Melt to long format
  #filter(Abundance > 0.05) %>%                         # Filter out low abundance taxa
  #arrange(Species)                                      # Sort data frame alphabetically by phylum

  Aps_alpha_barplot$Reps <- sub(gg, "", Aps_alpha_barplot$Replicates)  
  Aps_alpha_barplot$ASV <- Aps_alpha_barplot$OTU #sub(".*:", "", Aps_alpha_barplot$OTU)  
  #Aps_alpha_barplot$ASV <- #sub('\\.', ' ', Aps_alpha_barplot$ASV)
  #print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)

  #GroupOfInterestSpecies <- sub(".*:", "", GroupOfInterest) 
  #GroupOfInterestSpecies <-sub('\\.', ' ', GroupOfInterestSpecies)
  
  ######################
  #PathogeneSpecies ASV#
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% GroupOfInterest,] 
  
  ################
  #For Genus glom#
   #Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$Genus %in% GroupOfInterestSpecies,] 
  
  
  #print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
  Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
  #https://gist.github.com/svigneau/05148a7031172c2bc70d
  
  RepOrder <-c( 
    "SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
    
   # if (gg == "GC") {
   #   RepOrder <- GCOrder
   #   } else if (gg == "OE") {
   #   RepOrder <- OEOrder
   #   } else if (gg == "SL") {
   #     RepOrder <- SLOrder }
  
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))

  Aps_alpha_barplot_Pathogens$Labels <- labels
  print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 5))

  df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
              OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
  
  df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
  Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
  Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance, 
                                                                  decreasing = TRUE), ]
  top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
  
  LocOrderSL=c(
  "Medem Grund","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch") 

    microbiomeseq_cols <- function(){
    colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
               "#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
               "#FFFF00",
                grey.colors(1000));return(colours)}
    colours <-   microbiomeseq_cols() 
    RepOrder <-  SLOrder
    
  df <- Aps_alpha_barplot_Pathogens
  p<-ggplot(df, 
            aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) + 
  geom_bar(stat = "identity") +
  facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
  #scale_x_discrete(labels = LocOrder) + 
  atheme2 +
  ylab("Relative Abundance [%] \n") + xlab("Sample") +
  #scale_color_manual(values= col.Palette.species2)+
  #scale_fill_manual(values= col.Palette.species2)
  ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
  scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
  ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance,  aes(label = Labels),
   inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
 
  atheme + 
        theme(strip.text.y = element_text(angle = 0))  +
        theme(axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y.left = element_blank(),
        axis.ticks.y =element_blank()) +
  theme(panel.background = element_rect(fill='transparent'), #transparent panel bg
    plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
    legend.background = element_rect(fill='transparent'), #transparent legend bg
    legend.box.background = element_rect(fill='transparent')) + #transparent legend panel
  theme(axis.title.x.bottom =  element_text(color="grey13"), 
        strip.text = element_text(color = "black", face= "bold"))

  g <- ggplot_gtable(ggplot_build(p))
        stripr <- which(grepl('strip-t', g$layout$name))
        fills <- alpha(col.Palette[[Species]], 0.7)
        k <- 1
        for (iii in stripr) {
        j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
        g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
        k <- k+1}
        
  prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
  AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
  ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
  height = 8)
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}  
## [1] "ps_WGCNA"
##   Labels ReadNum    SampleID                 OTU
## 1  16628   16628 SLSU21BBEB1   ASV15:Verticiella
## 2          16628 SLSU21BBEB1 ASV1105:Pseudomonas
## 3          16628 SLSU21BBEB1 ASV1432:Verticiella
## 4          16628 SLSU21BBEB1   ASV146:Shewanella
## 5          16628 SLSU21BBEB1  ASV1632:Shewanella

#-

3 RNAseq Gill

Individual Co-Expression networks

3.1 Gill RNA Setup

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set

set.seed(123)
Species    <- "SL"
Year       <- "2021"
Season     <- "Summer"
Type       <- "RNA"
Tissue     <- "Gill"
Analysis   <- "WGCNA"
alpha      <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"

#####################
Tissue <- "Gill"
variable <- "Replicates"
#####################

save_name_RNA_Gill <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Gill, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Gill_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Gill$SampleID, type="cmd"), collapse=", ")) 
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

Samples_RNA_Gill_All<-c(
  "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4",  "SLSU21MGEB5", "SLSU21MGEB7",
  "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
  "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6",  "SLSU21SSEB7", "SLSU21SSEB9",
  "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

Samples_RNA_Gill<-c(
  "SLSU21MGEB1", "SLSU21MGEB2",                "SLSU21MGEB4",                "SLSU21MGEB7",
  "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
  "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5",                "SLSU21SSEB7", "SLSU21SSEB9",
  "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

#"SLSU21MGEB5" outlier in sample Clustering
#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill

3.2 DESeq2

Samples_RNA_Gill_List <- list("Samples_RNA_Gill_All" = Samples_RNA_Gill_All, "Samples_RNA_Gill"=Samples_RNA_Gill)

WGCNA_RNA_Gill <- list()
for (Gills in names(Samples_RNA_Gill_List)){

  WGCNA_RNA_Gill[[Gills]] <- list()
  
  Samples <- Samples_RNA_Gill_List[[Gills]]
  
  SAMDF_RNA_Gill           <- SAMDF_RNA_Gill[SAMDF_RNA_Gill$SampleID %in% Samples,]

  SAMDF_RNA_Gill  <-SAMDF_RNA_Gill %>% arrange(factor(rownames(SAMDF_RNA_Gill), levels = Samples))
  rownames(SAMDF_RNA_Gill) <-SAMDF_RNA_Gill$SampleID

#=====================================================================================
# ExperimentalDesign & Filter
#=====================================================================================
  cnt_RNA_Gill    <- cnt_RNA_Gill[,SAMDF_RNA_Gill$SampleID] 

  #ExperimentalDesign
  library(DESeq2)
  dds_RNA_Gill <- DESeqDataSetFromMatrix(countData = cnt_RNA_Gill,
                              colData = SAMDF_RNA_Gill,
                              design = ~ Replicates)
  vst_RNA_Gill <- vst(dds_RNA_Gill, blind = FALSE)

  ##################
  # SampleDist PCAs#
  ##################
  vst_RNA_list <- list("vst_RNA_Gill" =vst_RNA_Gill)
  for (i in names(vst_RNA_list)[grepl("vst", names(vst_RNA_list))]){
    require(plyr)
    require(ggrepel) 
    require(cowplot)
    if (OperatingSystem == "Mac" ) {
       quartz() }
    tryCatch({
    g       <- paste(i) 
    gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  RNA_vst_PCA_Gill <-pcaplotRK3(vst_RNA_list[[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",
                   ellipse = TRUE,     ellipse.prob = 0.5) +
  scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
  scale_color_manual(values=col.Palette$SL) + atheme +
  theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
  theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
  prow <- cowplot::plot_grid(RNA_vst_PCA_Gill, labels = c(""), ncol = 1)
  title <- ggdraw() + draw_label_themeRKwhite(paste(Species,  gg, Type, Gills), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRKwhite(paste("vst-PCA", "with",       ... = 
  length(rownames(vst_RNA_list[[i]])),"genes",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,   
  vjust = 1)
  RNA_vst_PCA_Gill<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
  ggsave(RNA_vst_PCA_Gill, filename = paste(paste(save_name_RNA_Gill, "vst-PCA", Gills, sep="_"),".png", sep=""), path = pathPlots ,
         device='png', dpi=300, width = 7,
  height = 7)
  plot(RNA_vst_PCA_Gill)
  WGCNA_RNA_Gill[[Gills]]$RNA_vst_PCA_Gill <- RNA_vst_PCA_Gill
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }

  #######################
  ### 3.1.2 Clustering###
  #######################

  ########################################
  # Get overview of normalized data#
  # dds <- DESeq(dds)
  # VST <- getVarianceStabilizedData(dds)
  # rv_VST <- rowVars(VST)
  # summary(rv_VST)
  # q75_VST <- quantile( rowVars(VST), .75)  # <= original
  # expr_normalized <- VST[ rv_VST > q75_VST, ]
  # 
  # expr_normalized_df <- data.frame(expr_normalized) %>%
  #   mutate(Gene_id = row.names(expr_normalized)) %>% pivot_longer(-Gene_id)
  # 
  # expr_normalized_df %>% ggplot(., aes(x = name, y = value)) +
  #   geom_violin() +
  #   geom_point() +
  #   theme_bw() +
  #   theme(axis.text.x = element_text( angle = 90)) +
  #   ylim(0, NA) +
  #   labs(
  #     title = "Normalized and 75 quantile Expression",
  #     x = "treatment",y = "normalized expression")
  ####################################################

  #=====================================================================================
  #  Code chunk 3
  #=====================================================================================
  library(WGCNA)
  #rlog-Transformation
  omics_data0 <-data.frame(t(assay(vst_RNA_Gill)))
  a <- length(omics_data0)
  gsg = goodSamplesGenes(omics_data0, verbose = 3);
  gsg$allOK

  #=====================================================================================
  #  Code chunk 4
  #=====================================================================================
  if (!gsg$allOK) {
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) 
     print(paste("Removing genes"));
    if (sum(!gsg$goodSamples)>0) 
     print(paste("Removing samples:"));
    # Remove the offending genes and samples from the data:
    omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
  }
  b <- length(omics_data0)

  print(paste("genes before", a))
  print(paste("genes before", b))
  print(paste("genes before", b-a))

  #=====================================================================================
  #  Code chunk 5
  #=====================================================================================
  sampleTree = hclust(dist(omics_data0), method = "average");
  # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
  # The user should change the dimensions if the window is too large or too small.
  sizeGrWindow(12,9)
  #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
  par(cex = 0.6);
  par(mar = c(0,4,2,0))
  plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)

  #=====================================================================================
  #  Code chunk 6
  #=====================================================================================
  # Plot a line to show the cut
  abline(h = 100, col = "red");
  # Determine cluster under the line
  clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
  table(clust)
  # clust 1 contains the samples we want to keep.
  recordPlot() -> SampleClusteringTreePlot

  #in Case we would exclude outliers from the tree: 
  #keepSamples = (clust==1)
  #omics_data0 = omics_data0[keepSamples, ]

  #Keep all samples
  omics_data = omics_data0
  nGenes = ncol(omics_data)
  nSamples = nrow(omics_data)

  #=====================================================================================
  #  Code chunk 7
  #=====================================================================================
  datTraits<-SAMDF_RNA_Gill[,traitData]
  datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
  collectGarbage()

  #=====================================================================================
  #  Code chunk 8
  #=====================================================================================
  # Re-cluster samples
  sampleTree2 = hclust(dist(omics_data), method = "average")
  # Convert traits to a color representation: white means low, red means high, grey means missing entry
  traitColors = numbers2colors(datTraits, signed = FALSE)
  # Plot the sample dendrogram and the colors underneath.
  plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")
  recordPlot() -> SampleClusteringTreeTraitsPlot

  #=====================================================================================
  #  Code chunk 9
  #=====================================================================================

  WGCNA_RNA_Gill[[Gills]]$omics_data <- omics_data
  WGCNA_RNA_Gill[[Gills]]$datTraits  <- datTraits
  WGCNA_RNA_Gill[[Gills]]$SAMDF_RNA_Gill  <-  SAMDF_RNA_Gill
  WGCNA_RNA_Gill[[Gills]]$SampleClusteringTreePlot  <- SampleClusteringTreePlot
  WGCNA_RNA_Gill[[Gills]]$SampleClusteringTreeTraitsPlot <- SampleClusteringTreeTraitsPlot
  
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 3333 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 29583"
## [1] "genes before -3333"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 3489 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 29427"
## [1] "genes before -3489"
SAMDF_RNA_Gill <-WGCNA_RNA_Gill$Samples_RNA_Gill$SAMDF_RNA_Gill
omics_data<- WGCNA_RNA_Gill$Samples_RNA_Gill$omics_data
datTraits<- WGCNA_RNA_Gill$Samples_RNA_Gill$datTraits
save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "dataInput", Date, sep="_"), ".RData", sep=""))))

##############
#Summary Plot#
##############
cowplot::plot_grid(WGCNA_RNA_Gill$Samples_RNA_Gill_All$SampleClusteringTreePlot, WGCNA_RNA_Gill$Samples_RNA_Gill$SampleClusteringTreePlot, labels = c("A", "B"), rel_heights = c(1,1), rel_widths =  c(1,0.8), ncol = 2) -> part_1
part_1
cowplot::plot_grid(WGCNA_RNA_Gill$Samples_RNA_Gill_All$RNA_vst_PCA_Gill, WGCNA_RNA_Gill$Samples_RNA_Gill$RNA_vst_PCA_Gill, labels = c("C", "D"), rel_heights = c(1,1), rel_widths =  c(1,1), ncol = 2) -> part_2
part_2
cowplot::plot_grid(part_1, part_2, ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_RNA_Gill, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 12)
part_3

3.3 Network construction

Soft Threshold Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency, based on the criterion of approximate scale-free topology.

The general guidance for WGCNA and hdWGCNA is to pick the lowest soft power threshold that has a Scale Free Topology Model Fit greater than or equal to 0.8, so in this case we would select our soft power threshold as 9. Later on, the ConstructNetwork will automatically select the soft power threshold if the user does not provide one.

3.3.1 Pick Soft Threshold

#=====================================================================================
#  Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
#  Code chunk 2
#=====================================================================================
#from Strand et al, 2021
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
  allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
  sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1520.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1520 of 29427
##    ..working on genes 1521 through 3040 of 29427
##    ..working on genes 3041 through 4560 of 29427
##    ..working on genes 4561 through 6080 of 29427
##    ..working on genes 6081 through 7600 of 29427
##    ..working on genes 7601 through 9120 of 29427
##    ..working on genes 9121 through 10640 of 29427
##    ..working on genes 10641 through 12160 of 29427
##    ..working on genes 12161 through 13680 of 29427
##    ..working on genes 13681 through 15200 of 29427
##    ..working on genes 15201 through 16720 of 29427
##    ..working on genes 16721 through 18240 of 29427
##    ..working on genes 18241 through 19760 of 29427
##    ..working on genes 19761 through 21280 of 29427
##    ..working on genes 21281 through 22800 of 29427
##    ..working on genes 22801 through 24320 of 29427
##    ..working on genes 24321 through 25840 of 29427
##    ..working on genes 25841 through 27360 of 29427
##    ..working on genes 27361 through 28880 of 29427
##    ..working on genes 28881 through 29427 of 29427
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1    0.324 -2.05          0.858 6480.00  6270.000  10100
## 2      2    0.687 -2.20          0.924 2210.00  2000.000   5040
## 3      3    0.788 -2.18          0.942  950.00   784.000   2990
## 4      4    0.815 -2.16          0.947  472.00   351.000   1940
## 5      5    0.843 -2.11          0.961  260.00   175.000   1340
## 6      6    0.853 -2.07          0.968  155.00    99.700    969
## 7      7    0.860 -2.02          0.977   98.60    56.900    720
## 8      8    0.859 -2.00          0.982   65.90    33.700    548
## 9      9    0.864 -1.96          0.988   46.10    20.900    425
## 10    10    0.858 -1.93          0.989   33.50    13.500    334
## 11    12    0.828 -1.84          0.974   19.60     6.120    215
## 12    14    0.783 -1.61          0.886   12.90     2.960    145
## 13    16    0.967 -1.16          0.962    9.36     1.520    103
## 14    18    0.942 -1.16          0.946    7.37     0.827    102
## 15    20    0.897 -1.12          0.931    6.17     0.465    102
  # Plot the results:
  # Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
  # based on the criterion of approximate scale-free topology.
  idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
  if(is.infinite(idx)){
    idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
    if(!is.infinite(idx)){
      st <- sft$fitIndices[idx,1]
      } else{
     idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
      st <- sft$fitIndices[idx,1]
    }
  } else{st <- sft$fitIndices[idx,1]}
  # Plot Scale independence measure and Mean connectivity measure

  # Scale-free topology fit index as a function of the soft-thresholding power
  data.frame(Indices = sft$fitIndices[,1],
           sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>% 
    ggplot() + 
    geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
    geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
    geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
    geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
    ggtitle("Scale independence") +
    xlab("Soft Threshold (power)") +
    ylab("SF Model Fit,signed R^2") +
    xlim(1,20) +
    ylim(-1,1) +
    geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05), 
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.5)-> scale_independence_plot 
  
# Mean connectivity as a function of the soft-thresholding power
  data.frame(Indices = sft$fitIndices[,1],
           meanApprox = sft$fitIndices[,5]) %>% 
    ggplot() + 
    geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
    geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
    xlab("Soft Threshold (power)") +
    ylab("Mean Connectivity") +
    geom_segment(aes(x = st-0.4, 
                   y = sft$fitIndices$mean.k.[idx], 
                   xend = 0, 
                   yend = sft$fitIndices$mean.k.[idx]),
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.4) +
    ggtitle(paste0("Mean connectivity: ", 
                 round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot

  si_mc_plot <- cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"))

  si_mc_plot

  ggsave(si_mc_plot, filename = paste(paste(save_name_RNA_Gill, "soft_thresholding_power", sep="_"),".png", sep=""), path = pathPlots,
         device='png', dpi=300, width = 8, height = 6)

  softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 16"
  softPower <- 6

3.3.2 blockwiseModules

deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.

##################
#blockwiseModules#
##################
softPower = softPower; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
#https://www.biostars.org/p/305714/
#there is a problem with cor form stats and cor from WGCNA, restart R
network = blockwiseModules(omics_data, 
                       power = softPower,
                       networkType = "signed", 
                       TOMType = "signed",
                       corType = "bicor",
                       minModuleSize = 30, #for genes 30
                       reassignThreshold = 0, 
                       deepSplit = 2,
                       mergeCutHeight = 0.25,
                       numericLabels = TRUE, 
                       pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"), 
                       verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ....pre-clustering genes to determine blocks..
##    Projective K-means:
##    ..k-means clustering..
##    ..merging smaller clusters...
## Block sizes:
## gBlocks
##    1    2    3    4    5    6    7 
## 4996 4871 4861 4545 4073 3706 2375 
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file SL_Gill_RNA_Summer_TOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 7 genes from module 1 because their KME is too low.
##      ..removing 13 genes from module 2 because their KME is too low.
##      ..removing 2 genes from module 3 because their KME is too low.
##      ..removing 2 genes from module 4 because their KME is too low.
##  ..Working on block 2 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 2 into file SL_Gill_RNA_Summer_TOM-block.2.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 131 genes from module 1 because their KME is too low.
##      ..removing 20 genes from module 2 because their KME is too low.
##      ..removing 45 genes from module 3 because their KME is too low.
##      ..removing 22 genes from module 4 because their KME is too low.
##      ..removing 28 genes from module 5 because their KME is too low.
##      ..removing 17 genes from module 6 because their KME is too low.
##      ..removing 16 genes from module 7 because their KME is too low.
##      ..removing 11 genes from module 9 because their KME is too low.
##      ..removing 159 genes from module 10 because their KME is too low.
##      ..removing 1 genes from module 12 because their KME is too low.
##      ..removing 1 genes from module 13 because their KME is too low.
##  ..Working on block 3 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 3 into file SL_Gill_RNA_Summer_TOM-block.3.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 153 genes from module 1 because their KME is too low.
##      ..removing 68 genes from module 2 because their KME is too low.
##      ..removing 80 genes from module 3 because their KME is too low.
##      ..removing 30 genes from module 4 because their KME is too low.
##      ..removing 28 genes from module 6 because their KME is too low.
##      ..removing 12 genes from module 7 because their KME is too low.
##  ..Working on block 4 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 4 into file SL_Gill_RNA_Summer_TOM-block.4.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 339 genes from module 1 because their KME is too low.
##      ..removing 99 genes from module 3 because their KME is too low.
##      ..removing 72 genes from module 4 because their KME is too low.
##      ..removing 117 genes from module 5 because their KME is too low.
##      ..removing 273 genes from module 7 because their KME is too low.
##      ..removing 286 genes from module 8 because their KME is too low.
##  ..Working on block 5 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 5 into file SL_Gill_RNA_Summer_TOM-block.5.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 233 genes from module 1 because their KME is too low.
##      ..removing 178 genes from module 2 because their KME is too low.
##      ..removing 414 genes from module 3 because their KME is too low.
##      ..removing 420 genes from module 4 because their KME is too low.
##      ..removing 362 genes from module 5 because their KME is too low.
##      ..removing 23 genes from module 6 because their KME is too low.
##      ..removing 216 genes from module 8 because their KME is too low.
##      ..removing 19 genes from module 9 because their KME is too low.
##  ..Working on block 6 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 6 into file SL_Gill_RNA_Summer_TOM-block.6.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 442 genes from module 1 because their KME is too low.
##      ..removing 119 genes from module 2 because their KME is too low.
##      ..removing 79 genes from module 3 because their KME is too low.
##      ..removing 59 genes from module 4 because their KME is too low.
##      ..removing 327 genes from module 5 because their KME is too low.
##      ..removing 348 genes from module 6 because their KME is too low.
##      ..removing 12 genes from module 7 because their KME is too low.
##      ..removing 1 genes from module 8 because their KME is too low.
##  ..Working on block 7 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 7 into file SL_Gill_RNA_Summer_TOM-block.7.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 245 genes from module 4 because their KME is too low.
##      ..removing 238 genes from module 5 because their KME is too low.
##      ..removing 37 genes from module 6 because their KME is too low.
##      ..removing 47 genes from module 7 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
#                           power = st, 
#                           networkType = "signed", 
#                           TOMType = "signed",
#                           corType = "bicor",
#                           maxPOutliers = 0.05,
#                           deepSplit = 4, # Default 2
#                           minModuleSize = 2, # 30
#                           minCoreKME = 0.5,      # Default 0.5
#                           minCoreKMESize = 2,    # Default minModuleSize/3,
#                           minKMEtoStay = 0.5,    # Default 0.3
#                           reassignThreshold = 0, # Default 1e-6
#                           mergeCutHeight = 0.4,  # Default 0.15
#                           pamStage = FALSE, 
#                           pamRespectsDendro = TRUE,
#                           replaceMissingAdjacencies = TRUE,
#                           numericLabels = TRUE,
#                           saveTOMs = TRUE,
#                           saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
#                           verbose = 3)

###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs;
geneTree = network$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
     file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))

RNA_Gill_WGCNA <- list("RNA_Gill_omics_data" = omics_data, 
                       "RNA_Gill_datTraits"=datTraits,
                       "RNA_Gill_MEs"=MEs,
                       "RNA_Gill_moduleLabels"=moduleLabels,
                       "RNA_Gill_moduleColors"=moduleColors,
                       "RNA_Gill_geneTree"=geneTree)

saveRDS(RNA_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List", Date, sep="_"), ".rds", sep=""))))

3.3.3 Module & Network Visualization

#####################
#plotDendroAndColors#
#####################

##############################
#Number of ASV in each module#
##############################
as.data.frame(table(RNA_Gill_WGCNA$RNA_Gill_moduleLabels)) %>% 
  dplyr::rename(Module = Var1, Size = Freq) %>%
  dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>% 
  ggplot(aes(x = Module, y = Size, fill = Module)) +
  geom_col(color =  "#000000") +
  ggtitle("Number of genes in each module") +
  theme(legend.position = "none") + 
  scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
  geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
  ylim(0, max(module_size$Size)*1.1) +
  theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
  coord_flip() -> RNAModuleSizePlot

RNAModuleSizePlot

ggsave(RNAModuleSizePlot, filename = paste(paste(save_name_RNA_Gill, "RNAModulePlot", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]

MEs_R_noME0[upper.tri(MEs_R_noME0)] %>% 
  as.data.frame() %>% 
  dplyr::rename("correlation" = ".") %>% 
  ggplot(aes(x=correlation)) + 
  geom_histogram(bins = 20) +
  #geom_density() + 
  xlim(-1, 1) +
  ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
MEs_R_density

pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
                   silent = T, 
                   breaks = seq(-1,1,length.out = 101),
                   treeheight_row = 5, 
                   treeheight_col = 5,
                   main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
                   labels_row = paste0(prefix, rownames(MEs_R)),
                   labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr
MEs_R_Corr

ggsave(MEs_R_Corr, filename = paste(paste(save_name_RNA_Gill, "MEs_R_Corr", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen


#######################
#Network-Visualization#
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.

# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#############################
# Correlation within modules# from Strand et al, 2021
#############################
corr_within_module <- function(omics_data, network, module_x = 1){
  idx.omics_data <- which(network$colors == module_x)
  idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
  kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
  kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
  h <- as.numeric(sub("ME","", m))
  data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>% 
    ggplot() + 
    geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) + 
    xlim(-1, 1) +
    xlab("ASV correlation")+
    ggtitle(paste0(prefix,m)) -> da_plot
  ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]

cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot

ggsave(density_all_plot, filename = paste(paste(save_name_RNA_Gill, "WithinModuleCorrelation", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 20)


##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, RNAModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,1)) -> part_2
ggsave(part_2, filename = paste(paste(save_name_RNA_Gill, "RNA_Gill_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,
  height = 8)
part_2

3.4 Module-Trait-Correlation Heatmap

###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(RNA_Gill_WGCNA$RNA_Gill_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .)  %>% paste("RNA",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)

# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs) 

mME = mat %>%
  pivot_longer(-traits) %>%
  mutate(#name = gsub("ME", "", name),
    name = factor(name, levels = module_order))

textMatrixLong <-  as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
  pivot_longer(-traits) %>%
  mutate(
    #name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)

textMatrixLong2 <-  as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
  pivot_longer(-traits) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)

## add gene counts per module
 genesmod<- as.data.frame(moduleLabels)
 genesmod$genes <- rownames(genesmod)
 genesmod$Modules <- paste("RNA",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
 
ModCount <- as.data.frame(genesmod %>% 
  dplyr::group_by(Modules) %>% 
  dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]


HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
  geom_tile() +
  scale_fill_gradient2(
    low = "steelblue1",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

ggsave(A, filename = paste(paste(save_name_RNA_Gill, "WGCNA-ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

      
 
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",         
"Salinity",    
"SecchiDepth"
)) ~ "Abiotics", 
(traits %in% c(
"FultonK", 
"Length",
"Weight",
"StomachContent",
"HSI",              
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")
 
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
  geom_tile() +
    scale_fill_gradientn(
      colours = c("steelblue1", "white", "red"),
      limit = c(-1,1), 
      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
  scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
  facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

ggsave(A, filename = paste(paste(save_name_RNA_Gill, "WGCNA-ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 13, height = 7) 
      

##################
#Specific Modules#
##################
 #  Module <- "steelblue"
 #  HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
 #  geom_tile() +
 #    scale_fill_gradientn(
 #      colours = c("steelblue1", "white", "red"),
 #      limit = c(-1,1),
 #      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
 #  # scale_fill_gradient2(
 #  #   low = "steelblue1",
 #  #   high = "red",
 #  #   mid = "white",
 #  #   midpoint = 0,
 #  #   limit = c(-1,1))
 #  #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
 #  theme(axis.text.x = element_text(angle=90)) +
 #  labs(title = "", y = "Modules", fill="corr") +
 #  geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2),
 #                      size=3, colour="grey20") +
 #  geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2),
 #                      colour="grey20", size=3) +
 #   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
 #      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
 #      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 # A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
 #      plot(A)
 #      ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots,
 #             device='png', dpi=300, width = 2.8,height = 7)

3.6 Dataframe Module-Genes

RNA_Gill_WGCNA<- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List", Date, sep="_"), ".rds", sep=""))))

InterestingComparison <- RNA_Gill_WGCNA$RNA_Gill_MEs
Module_Genes_list <- list()
for (MODULE in names(InterestingComparison)) {
  a <- length(Module_Genes_list)
  ExpressionSet <- RNA_Gill_WGCNA$RNA_Gill_omics_data
  moduleLabels  <- RNA_Gill_WGCNA$RNA_Gill_moduleLabels
  Annotations   <- SLUCGeneManual
  
  genes_of_interest <- as.data.frame(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))]))
  #print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))
  colnames(genes_of_interest) <- "rowname"
  print(paste("Module genes before Conversion to EntrezIDs"))
  print(paste(MODULE, dim(genes_of_interest)[1]))
  
  ########################################
  #Add annotated Gene-Names to omics data#
  ########################################
  #print(paste("Lost by double dot to dot conversion somwhere in the pipeline"))
  #print(genes_of_interest$rowname[genes_of_interest$rowname %in% SLUCGeneManual$rowname == FALSE])
  
  AnnotationDbi_genes  <-dplyr::left_join(genes_of_interest, SLUCGeneManual)
  AnnotationDbi_genes$GeneSymbolHS <- AnnotationDbi_genes$Human_SYMBOL_Manual
  AnnotationDbi_genes2  <- as.data.frame(AnnotationDbi_genes$Human_SYMBOL_Manual)
  colnames(AnnotationDbi_genes2) <- "GeneSymbolHS"
  library(org.Hs.eg.db)
  HS<-AnnotationDbi::select(org.Hs.eg.db, AnnotationDbi_genes2$GeneSymbolHS, "ENTREZID","SYMBOL") #Get EntrezIDs from Symbol
  #CHECK!#
  subset(HS, is.na(HS$ENTREZID))
  dim(subset(HS, is.na(HS$ENTREZID)))
  AnnotationDbi_genes2 <- dplyr::left_join(AnnotationDbi_genes2, HS, by=c("GeneSymbolHS" = "SYMBOL"))
  dedup_ids = AnnotationDbi_genes2[!duplicated(AnnotationDbi_genes2[c("ENTREZID")]),]
  dedup_ids <- dedup_ids[!is.na(dedup_ids$ENTREZID),]
  print(paste("Module genes After Conversion to EntrezIDs"))
  print(paste(MODULE, dim(dedup_ids)[1]))
  #print(head(dedup_ids))
  A<- dplyr::left_join(AnnotationDbi_genes[c("Geneid", "GeneSymbolHS")], dedup_ids)
  A <- A[complete.cases(A$Geneid), ]
  
  Module_Genes_list[[a+1]] <- A
  names(Module_Genes_list)[[a+1]] <- paste("GeneAnno",MODULE, sep="_")
}
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME2 2960"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME2 2184"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME11 639"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME11 459"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME7 764"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME7 548"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME10 728"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME10 605"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME20 246"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME20 191"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME25 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME25 103"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME31 76"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME31 52"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME16 388"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME16 330"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME38 38"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME38 30"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME17 297"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME17 262"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME5 888"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME5 774"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME4 936"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME4 845"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME15 413"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME15 360"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME21 223"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME21 180"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME26 104"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME26 83"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME37 41"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME37 38"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME36 48"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME36 29"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME9 743"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME9 547"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME14 429"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME14 324"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME27 95"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME27 64"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME12 541"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME12 435"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME13 499"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME13 416"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME22 215"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME22 183"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME28 94"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME28 50"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME32 72"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME32 49"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME29 87"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME29 51"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME34 68"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME34 50"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME30 84"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME30 60"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME35 55"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME35 31"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME3 1675"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME3 1352"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME23 174"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME23 137"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME1 3848"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME1 2814"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME6 859"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME6 733"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME8 749"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME8 546"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME24 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME24 86"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME18 296"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME18 216"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME19 251"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME19 196"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME33 70"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME33 51"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME0 9484"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME0 3864"
Gene_counts <- list()
for (i in names(Module_Genes_list)) {
  a <- length(Gene_counts)
  g <- sub("GeneAnno_ME", "", i)
  A <- length(unique(na.omit(Module_Genes_list[[i]]$ENTREZID)))
  B <- length(na.omit(Module_Genes_list[[i]]$Geneid))
  C <- table(RNA_Gill_WGCNA$RNA_Gill_moduleLabels == g)[2]
  
  module_summary <- data.frame(Module = i, Module_Size = C, EnrezID = A)
  Gene_counts[[a+1]] <- module_summary}
print(do.call(rbind, Gene_counts))
##               Module Module_Size EnrezID
## TRUE    GeneAnno_ME2        2960    2184
## TRUE1  GeneAnno_ME11         639     459
## TRUE2   GeneAnno_ME7         764     548
## TRUE3  GeneAnno_ME10         728     605
## TRUE4  GeneAnno_ME20         246     191
## TRUE5  GeneAnno_ME25         125     103
## TRUE6  GeneAnno_ME31          76      52
## TRUE7  GeneAnno_ME16         388     330
## TRUE8  GeneAnno_ME38          38      30
## TRUE9  GeneAnno_ME17         297     262
## TRUE10  GeneAnno_ME5         888     774
## TRUE11  GeneAnno_ME4         936     845
## TRUE12 GeneAnno_ME15         413     360
## TRUE13 GeneAnno_ME21         223     180
## TRUE14 GeneAnno_ME26         104      83
## TRUE15 GeneAnno_ME37          41      38
## TRUE16 GeneAnno_ME36          48      29
## TRUE17  GeneAnno_ME9         743     547
## TRUE18 GeneAnno_ME14         429     324
## TRUE19 GeneAnno_ME27          95      64
## TRUE20 GeneAnno_ME12         541     435
## TRUE21 GeneAnno_ME13         499     416
## TRUE22 GeneAnno_ME22         215     183
## TRUE23 GeneAnno_ME28          94      50
## TRUE24 GeneAnno_ME32          72      49
## TRUE25 GeneAnno_ME29          87      51
## TRUE26 GeneAnno_ME34          68      50
## TRUE27 GeneAnno_ME30          84      60
## TRUE28 GeneAnno_ME35          55      31
## TRUE29  GeneAnno_ME3        1675    1352
## TRUE30 GeneAnno_ME23         174     137
## TRUE31  GeneAnno_ME1        3848    2814
## TRUE32  GeneAnno_ME6         859     733
## TRUE33  GeneAnno_ME8         749     546
## TRUE34 GeneAnno_ME24         125      86
## TRUE35 GeneAnno_ME18         296     216
## TRUE36 GeneAnno_ME19         251     196
## TRUE37 GeneAnno_ME33          70      51
## TRUE38  GeneAnno_ME0        9484    3864
RNA_Gill_WGCNA$RNA_Gill_GeneAnno <- Module_Genes_list


saveRDS(RNA_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List_3", Date, sep="_"), ".rds", sep=""))))

-

4 RNAseq Liver

Individual Co-Expression networks

4.1 Liver RNA Setup

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set

set.seed(123)
Species    <- "SL"
Year       <- "2021"
Season     <- "Summer"
Type       <- "RNA"
Tissue     <- "Liver"
Analysis   <- "WGCNA"
alpha      <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"

#####################
Tissue <-   "Liver"
variable <- "Replicates"
#####################

save_name_RNA_Liver <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Liver, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Liver_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Liver$SampleID, type="cmd"), collapse=", ")) 
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

Samples_RNA_Liver_All<-c(
  "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4",  "SLSU21MGEB5", "SLSU21MGEB7",
  "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
  "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6",  "SLSU21SSEB7", "SLSU21SSEB9",
  "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

Samples_RNA_Liver<-c(
  "SLSU21MGEB1", "SLSU21MGEB2",                "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
  "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
  "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5",                "SLSU21SSEB7", "SLSU21SSEB9",
  "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")

#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill

4.2 DESeq2

Samples_RNA_Liver_List <- list("Samples_RNA_Liver_All" = Samples_RNA_Liver_All, "Samples_RNA_Liver"=Samples_RNA_Liver)

WGCNA_RNA_Liver <- list()
for (Livers in names(Samples_RNA_Liver_List)){

  WGCNA_RNA_Liver[[Livers]] <- list()
  
  Samples <- Samples_RNA_Liver_List[[Livers]]
  
  SAMDF_RNA_Liver           <- SAMDF_RNA_Liver[SAMDF_RNA_Liver$SampleID %in% Samples,]

  SAMDF_RNA_Liver  <-SAMDF_RNA_Liver %>% arrange(factor(rownames(SAMDF_RNA_Liver), levels = Samples))
  rownames(SAMDF_RNA_Liver) <-SAMDF_RNA_Liver$SampleID

#=====================================================================================
# ExperimentalDesign & Filter
#=====================================================================================
  cnt_RNA_Liver    <- cnt_RNA_Liver[,SAMDF_RNA_Liver$SampleID] 

  #ExperimentalDesign
  library(DESeq2)
  dds_RNA_Liver <- DESeqDataSetFromMatrix(countData = cnt_RNA_Liver,
                              colData = SAMDF_RNA_Liver,
                              design = ~ Replicates)
  vst_RNA_Liver <- vst(dds_RNA_Liver, blind = FALSE)

  ##################
  # SampleDist PCAs#
  ##################
  vst_RNA_list <- list("vst_RNA_Liver" =vst_RNA_Liver)
  for (i in names(vst_RNA_list)[grepl("vst", names(vst_RNA_list))]){
    require(plyr)
    require(ggrepel) 
    require(cowplot)
    if (OperatingSystem == "Mac" ) {
       quartz() }
    tryCatch({
    g       <- paste(i) 
    gg          <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
  RNA_vst_PCA_Liver <-pcaplotRK3(vst_RNA_list[[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",
                   ellipse = TRUE,     ellipse.prob = 0.5) +
  scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
  scale_color_manual(values=col.Palette$SL) + atheme +
  theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
  theme(
        panel.grid.major = element_line(colour = "grey50"), 
        panel.grid.minor = element_line(colour = "grey50"))
  prow <- cowplot::plot_grid(RNA_vst_PCA_Liver, labels = c(""), ncol = 1)
  title <- ggdraw() + draw_label_themeRKwhite(paste(Species,  gg, Type, Livers), element = "plot.title",x = 0.05, hjust = 0,    vjust = 1)
  subtitle <- ggdraw() + draw_label_themeRKwhite(paste("vst-PCA", "with",       ... = 
  length(rownames(vst_RNA_list[[i]])),"genes",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,   
  vjust = 1)
  RNA_vst_PCA_Liver<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
  ggsave(RNA_vst_PCA_Liver, filename = paste(paste(save_name_RNA_Liver, "vst-PCA", Livers, sep="_"),".png", sep=""), path = pathPlots ,
         device='png', dpi=300, width = 7,
  height = 7)
  plot(RNA_vst_PCA_Liver)
  WGCNA_RNA_Liver[[Livers]]$RNA_vst_PCA_Liver <- RNA_vst_PCA_Liver
  }, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
  }

  #######################
  ### 3.1.2 Clustering###
  #######################

  ########################################
  # Get overview of normalized data#
  # dds <- DESeq(dds)
  # VST <- getVarianceStabilizedData(dds)
  # rv_VST <- rowVars(VST)
  # summary(rv_VST)
  # q75_VST <- quantile( rowVars(VST), .75)  # <= original
  # expr_normalized <- VST[ rv_VST > q75_VST, ]
  # 
  # expr_normalized_df <- data.frame(expr_normalized) %>%
  #   mutate(Gene_id = row.names(expr_normalized)) %>% pivot_longer(-Gene_id)
  # 
  # expr_normalized_df %>% ggplot(., aes(x = name, y = value)) +
  #   geom_violin() +
  #   geom_point() +
  #   theme_bw() +
  #   theme(axis.text.x = element_text( angle = 90)) +
  #   ylim(0, NA) +
  #   labs(
  #     title = "Normalized and 75 quantile Expression",
  #     x = "treatment",y = "normalized expression")
  ####################################################

  #=====================================================================================
  #  Code chunk 3
  #=====================================================================================
  library(WGCNA)
  #rlog-Transformation
  omics_data0 <-data.frame(t(assay(vst_RNA_Liver)))
  a <- length(omics_data0)
  gsg = goodSamplesGenes(omics_data0, verbose = 3);
  gsg$allOK

  #=====================================================================================
  #  Code chunk 4
  #=====================================================================================
  if (!gsg$allOK) {
    # Optionally, print the gene and sample names that were removed:
    if (sum(!gsg$goodGenes)>0) 
     print(paste("Removing genes"));
    if (sum(!gsg$goodSamples)>0) 
     print(paste("Removing samples:"));
    # Remove the offending genes and samples from the data:
    omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
  }
  b <- length(omics_data0)

  print(paste("genes before", a))
  print(paste("genes before", b))
  print(paste("genes before", b-a))

  #=====================================================================================
  #  Code chunk 5
  #=====================================================================================
  sampleTree = hclust(dist(omics_data0), method = "average");
  # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
  # The user should change the dimensions if the window is too large or too small.
  sizeGrWindow(12,9)
  #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
  par(cex = 0.6);
  par(mar = c(0,4,2,0))
  plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, 
     cex.axis = 1.5, cex.main = 2)

  #=====================================================================================
  #  Code chunk 6
  #=====================================================================================
  # Plot a line to show the cut
  abline(h = 100, col = "red");
  # Determine cluster under the line
  clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
  table(clust)
  # clust 1 contains the samples we want to keep.
  recordPlot() -> SampleClusteringTreePlot

  #in Case we would exclude outliers from the tree: 
  #keepSamples = (clust==1)
  #omics_data0 = omics_data0[keepSamples, ]


  #Keep all samples
  omics_data = omics_data0
  nGenes = ncol(omics_data)
  nSamples = nrow(omics_data)

  #=====================================================================================
  #  Code chunk 7
  #=====================================================================================

  datTraits<-SAMDF_RNA_Liver[,traitData]
  datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
  collectGarbage()

  #=====================================================================================
  #  Code chunk 8
  #=====================================================================================
  # Re-cluster samples
  sampleTree2 = hclust(dist(omics_data), method = "average")
  # Convert traits to a color representation: white means low, red means high, grey means missing entry
  traitColors = numbers2colors(datTraits, signed = FALSE)
  # Plot the sample dendrogram and the colors underneath.
  plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")
  recordPlot() -> SampleClusteringTreeTraitsPlot

  #=====================================================================================
  #  Code chunk 9
  #=====================================================================================

  WGCNA_RNA_Liver[[Livers]]$omics_data <- omics_data
  WGCNA_RNA_Liver[[Livers]]$datTraits  <- datTraits
  WGCNA_RNA_Gill[[Livers]]$SAMDF_RNA_Liver  <-  SAMDF_RNA_Liver
  WGCNA_RNA_Liver[[Livers]]$SampleClusteringTreePlot  <- SampleClusteringTreePlot
  WGCNA_RNA_Liver[[Livers]]$SampleClusteringTreeTraitsPlot <- SampleClusteringTreeTraitsPlot
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 4702 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 28214"
## [1] "genes before -4702"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
##  Flagging genes and samples with too many missing values...
##   ..step 1
##   ..Excluding 4850 genes from the calculation due to too many missing samples or zero variance.
##   ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 28066"
## [1] "genes before -4850"
SAMDF_RNA_Liver <-WGCNA_RNA_Liver$Samples_RNA_Liver$SAMDF_RNA_Liver
omics_data<- WGCNA_RNA_Liver$Samples_RNA_Liver$omics_data
datTraits<- WGCNA_RNA_Liver$Samples_RNA_Liver$datTraits

save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "dataInput", Date, sep="_"), ".RData", sep=""))))

##############
#Summary Plot#
##############
cowplot::plot_grid(WGCNA_RNA_Liver$Samples_RNA_Liver_All$SampleClusteringTreePlot, WGCNA_RNA_Liver$Samples_RNA_Liver$SampleClusteringTreePlot, labels = c("A", "B"), rel_heights = c(1,1), rel_widths =  c(1,0.8), ncol = 2) -> part_1
part_1
cowplot::plot_grid(WGCNA_RNA_Liver$Samples_RNA_Liver_All$RNA_vst_PCA_Liver, WGCNA_RNA_Liver$Samples_RNA_Liver$RNA_vst_PCA_Liver, labels = c("C", "D"), rel_heights = c(1,1), rel_widths =  c(1,1), ncol = 2) -> part_2
part_2
cowplot::plot_grid(part_1, part_2, ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_RNA_Liver, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 12)
part_3

4.3 Network construction

Soft Threshold Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency, based on the criterion of approximate scale-free topology.

The general guidance for WGCNA and hdWGCNA is to pick the lowest soft power threshold that has a Scale Free Topology Model Fit greater than or equal to 0.8, so in this case we would select our soft power threshold as 9. Later on, the ConstructNetwork will automatically select the soft power threshold if the user does not provide one.

4.3.1 Pick Soft Threshold

#=====================================================================================
#  Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
#  Code chunk 2
#=====================================================================================
#from Strand et al, 2021
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
  allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
  sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1594.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 1594 of 28066
##    ..working on genes 1595 through 3188 of 28066
##    ..working on genes 3189 through 4782 of 28066
##    ..working on genes 4783 through 6376 of 28066
##    ..working on genes 6377 through 7970 of 28066
##    ..working on genes 7971 through 9564 of 28066
##    ..working on genes 9565 through 11158 of 28066
##    ..working on genes 11159 through 12752 of 28066
##    ..working on genes 12753 through 14346 of 28066
##    ..working on genes 14347 through 15940 of 28066
##    ..working on genes 15941 through 17534 of 28066
##    ..working on genes 17535 through 19128 of 28066
##    ..working on genes 19129 through 20722 of 28066
##    ..working on genes 20723 through 22316 of 28066
##    ..working on genes 22317 through 23910 of 28066
##    ..working on genes 23911 through 25504 of 28066
##    ..working on genes 25505 through 27098 of 28066
##    ..working on genes 27099 through 28066 of 28066
##    Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1      1    0.282 -1.89          0.787 5790.00  5550.000   8790
## 2      2    0.792 -2.20          0.927 1890.00  1670.000   4240
## 3      3    0.889 -2.15          0.950  789.00   625.000   2520
## 4      4    0.882 -2.11          0.936  389.00   280.000   1700
## 5      5    0.866 -2.08          0.926  216.00   143.000   1230
## 6      6    0.858 -2.06          0.928  132.00    77.500    931
## 7      7    0.840 -2.04          0.927   86.40    44.200    730
## 8      8    0.813 -2.05          0.928   60.20    26.900    587
## 9      9    0.804 -2.01          0.935   44.10    16.900    481
## 10    10    0.786 -1.98          0.935   33.70    11.000    401
## 11    12    0.749 -1.83          0.889   21.90     4.950    288
## 12    14    0.774 -1.51          0.770   15.90     2.440    215
## 13    16    0.304 -2.43          0.237   12.60     1.280    207
## 14    18    0.311 -3.06          0.255   10.70     0.700    206
## 15    20    0.307 -3.33          0.223    9.42     0.398    206
  # Plot the results:
  # Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
  # based on the criterion of approximate scale-free topology.
  idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
  if(is.infinite(idx)){
    idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
    if(!is.infinite(idx)){
      st <- sft$fitIndices[idx,1]
      } else{
     idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
      st <- sft$fitIndices[idx,1]
    }
  } else{st <- sft$fitIndices[idx,1]}
  # Plot Scale independence measure and Mean connectivity measure

  # Scale-free topology fit index as a function of the soft-thresholding power
  data.frame(Indices = sft$fitIndices[,1],
           sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>% 
    ggplot() + 
    geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
    geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
    geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
    geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
    ggtitle("Scale independence") +
    xlab("Soft Threshold (power)") +
    ylab("SF Model Fit,signed R^2") +
    xlim(1,20) +
    ylim(-1,1) +
    geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05), 
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.5)-> scale_independence_plot 
  
# Mean connectivity as a function of the soft-thresholding power
  data.frame(Indices = sft$fitIndices[,1],
           meanApprox = sft$fitIndices[,5]) %>% 
    ggplot() + 
    geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
    geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
    xlab("Soft Threshold (power)") +
    ylab("Mean Connectivity") +
    geom_segment(aes(x = st-0.4, 
                   y = sft$fitIndices$mean.k.[idx], 
                   xend = 0, 
                   yend = sft$fitIndices$mean.k.[idx]),
               arrow = arrow(length = unit(0.2,"cm")), 
               size = 0.4) +
    ggtitle(paste0("Mean connectivity: ", 
                 round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot

  si_mc_plot <- cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"))

  si_mc_plot

  ggsave(si_mc_plot, filename = paste(paste(save_name_RNA_Liver, "soft_thresholding_power", sep="_"),".png", sep=""), path = pathPlots,
         device='png', dpi=300, width = 8, height = 6)

softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 3"
softPower <- 3

4.3.2 blockwiseModules

deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.

##################
#blockwiseModules#
##################
softPower = softPower; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 3"
#https://www.biostars.org/p/305714/
#there is a problem with cor form stats and cor from WGCNA, restart R
network = blockwiseModules(omics_data, 
                       power = softPower,
                       networkType = "signed", 
                       TOMType = "signed",
                       corType = "bicor",
                       minModuleSize = 30, #for genes 30
                       reassignThreshold = 0, 
                       deepSplit = 2,
                       mergeCutHeight = 0.25,
                       numericLabels = TRUE, 
                       pamRespectsDendro = FALSE,
                       saveTOMs = TRUE,
                       saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"), 
                       verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ....pre-clustering genes to determine blocks..
##    Projective K-means:
##    ..k-means clustering..
##    ..merging smaller clusters...
## Block sizes:
## gBlocks
##    1    2    3    4    5    6 
## 4988 4976 4645 4541 4521 4395 
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file SL_Liver_RNA_Summer_TOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 804 genes from module 1 because their KME is too low.
##      ..removing 112 genes from module 2 because their KME is too low.
##      ..removing 24 genes from module 3 because their KME is too low.
##  ..Working on block 2 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 2 into file SL_Liver_RNA_Summer_TOM-block.2.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 144 genes from module 1 because their KME is too low.
##      ..removing 102 genes from module 2 because their KME is too low.
##  ..Working on block 3 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 3 into file SL_Liver_RNA_Summer_TOM-block.3.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 88 genes from module 1 because their KME is too low.
##      ..removing 595 genes from module 2 because their KME is too low.
##      ..removing 407 genes from module 3 because their KME is too low.
##      ..removing 36 genes from module 4 because their KME is too low.
##      ..removing 245 genes from module 5 because their KME is too low.
##      ..removing 5 genes from module 9 because their KME is too low.
##      ..removing 1 genes from module 11 because their KME is too low.
##      ..removing 68 genes from module 12 because their KME is too low.
##      ..removing 24 genes from module 13 because their KME is too low.
##  ..Working on block 4 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 4 into file SL_Liver_RNA_Summer_TOM-block.4.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 78 genes from module 1 because their KME is too low.
##      ..removing 621 genes from module 2 because their KME is too low.
##      ..removing 502 genes from module 3 because their KME is too low.
##      ..removing 432 genes from module 4 because their KME is too low.
##      ..removing 323 genes from module 5 because their KME is too low.
##      ..removing 228 genes from module 6 because their KME is too low.
##      ..removing 264 genes from module 7 because their KME is too low.
##      ..removing 3 genes from module 8 because their KME is too low.
##      ..removing 1 genes from module 9 because their KME is too low.
##      ..removing 1 genes from module 11 because their KME is too low.
##  ..Working on block 5 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 5 into file SL_Liver_RNA_Summer_TOM-block.5.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 580 genes from module 1 because their KME is too low.
##      ..removing 468 genes from module 2 because their KME is too low.
##      ..removing 437 genes from module 3 because their KME is too low.
##      ..removing 70 genes from module 4 because their KME is too low.
##      ..removing 372 genes from module 5 because their KME is too low.
##      ..removing 300 genes from module 6 because their KME is too low.
##      ..removing 245 genes from module 9 because their KME is too low.
##      ..removing 18 genes from module 10 because their KME is too low.
##      ..removing 2 genes from module 11 because their KME is too low.
##  ..Working on block 6 .
##     TOM calculation: adjacency..
##     ..will use 8 parallel threads.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 6 into file SL_Liver_RNA_Summer_TOM-block.6.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##      ..removing 53 genes from module 1 because their KME is too low.
##      ..removing 57 genes from module 2 because their KME is too low.
##      ..removing 10 genes from module 3 because their KME is too low.
##      ..removing 372 genes from module 4 because their KME is too low.
##      ..removing 41 genes from module 8 because their KME is too low.
##      ..removing 88 genes from module 9 because their KME is too low.
##      ..removing 2 genes from module 10 because their KME is too low.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
#                           power = st, 
#                           networkType = "signed", 
#                           TOMType = "signed",
#                           corType = "bicor",
#                           maxPOutliers = 0.05,
#                           deepSplit = 4, # Default 2
#                           minModuleSize = 2, # 30
#                           minCoreKME = 0.5,      # Default 0.5
#                           minCoreKMESize = 2,    # Default minModuleSize/3,
#                           minKMEtoStay = 0.5,    # Default 0.3
#                           reassignThreshold = 0, # Default 1e-6
#                           mergeCutHeight = 0.4,  # Default 0.15
#                           pamStage = FALSE, 
#                           pamRespectsDendro = TRUE,
#                           replaceMissingAdjacencies = TRUE,
#                           numericLabels = TRUE,
#                           saveTOMs = TRUE,
#                           saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
#                           verbose = 3)

###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs;
geneTree = network$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
     file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))

lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))

RNA_Liver_WGCNA <- list("RNA_Liver_omics_data" = omics_data, 
                       "RNA_Liver_datTraits"=datTraits,
                       "RNA_Liver_MEs"=MEs,
                       "RNA_Liver_moduleLabels"=moduleLabels,
                       "RNA_Liver_moduleColors"=moduleColors,
                       "RNA_Liver_geneTree"=geneTree)

saveRDS(RNA_Liver_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))

RNA_Liver_WGCNA <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))

4.3.3 Module & Network Visualization

##############################
#Number of ASV in each module#
##############################
as.data.frame(table(RNA_Liver_WGCNA$RNA_Liver_moduleLabels)) %>% 
  dplyr::rename(Module = Var1, Size = Freq) %>%
  dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>% 
  ggplot(aes(x = Module, y = Size, fill = Module)) +
  geom_col(color =  "#000000") +
  ggtitle("Number of genes in each module") +
  theme(legend.position = "none") + 
  scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
  geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
  ylim(0, max(module_size$Size)*1.1) +
  theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
  coord_flip() -> RNAModuleSizePlot

RNAModuleSizePlot

ggsave(RNAModuleSizePlot, filename = paste(paste(save_name_RNA_Liver, "RNAModulePlot", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]

MEs_R_noME0[upper.tri(MEs_R_noME0)] %>% 
  as.data.frame() %>% 
  dplyr::rename("correlation" = ".") %>% 
  ggplot(aes(x=correlation)) + 
  geom_histogram(bins = 20) +
  #geom_density() + 
  xlim(-1, 1) +
  ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
MEs_R_density

pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
                   silent = T, 
                   breaks = seq(-1,1,length.out = 101),
                   treeheight_row = 5, 
                   treeheight_col = 5,
                   main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
                   labels_row = paste0(prefix, rownames(MEs_R)),
                   labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr

ggsave(MEs_R_Corr, filename = paste(paste(save_name_RNA_Liver, "MEs_R_Corr", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen



#######################
#Network-Visualization# Takes to long for knitting
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.

# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

#############################
# Correlation within modules# from Strand et al, 2021
#############################

corr_within_module <- function(omics_data, network, module_x = 1){
  idx.omics_data <- which(network$colors == module_x)
  idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
  kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
  kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
  h <- as.numeric(sub("ME","", m))
  data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>% 
    ggplot() + 
    geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) + 
    xlim(-1, 1) +
    xlab("ASV correlation")+
    ggtitle(paste0(prefix,m)) -> da_plot
  ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]

cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot

ggsave(density_all_plot, filename = paste(paste(save_name_RNA_Liver, "WithinModuleCorrelation", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 20)


##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, RNAModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,1)) -> part_2
ggsave(part_2, filename = paste(paste(save_name_RNA_Liver, "RNA_Liver_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,
  height = 8)
part_2

4.4 Module-Trait-Correlation Heatmap

###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(RNA_Liver_WGCNA$RNA_Liver_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .)  %>% paste("RNA",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)

# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs) 

mME = mat %>%
  pivot_longer(-traits) %>%
  mutate(#name = gsub("ME", "", name),
    name = factor(name, levels = module_order))

textMatrixLong <-  as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
  pivot_longer(-traits) %>%
  mutate(
    #name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)

textMatrixLong2 <-  as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
  pivot_longer(-traits) %>%
  mutate(
    name = gsub("ME", "", name),
    name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)

## add gene counts per module
 genesmod<- as.data.frame(moduleLabels)
 genesmod$genes <- rownames(genesmod)
 genesmod$Modules <- paste("RNA",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
 
ModCount <- as.data.frame(genesmod %>% 
  dplyr::group_by(Modules) %>% 
  dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]


HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
  geom_tile() +
  scale_fill_gradient2(
    low = "steelblue1",
    high = "red",
    mid = "white",
    midpoint = 0,
    limit = c(-1,1)) +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

ggsave(A, filename = paste(paste(save_name_RNA_Liver, "WGCNA-ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)

      
      
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",         
"Salinity",    
"SecchiDepth"
)) ~ "Abiotics", 
(traits %in% c(
"FultonK", 
"Length",
"Weight",
"StomachContent",
"HSI",              
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")
 
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
  geom_tile() +
    scale_fill_gradientn(
      colours = c("steelblue1", "white", "red"),
      limit = c(-1,1), 
      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
  scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
  facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
  theme(axis.text.x = element_text(angle=90)) +
  labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
  geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2), 
                      size=2.5, colour="grey20") +
  geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2), 
                      colour="grey20", size=2.5) +
   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
      plot(A)

ggsave(A, filename = paste(paste(save_name_RNA_Liver, "WGCNA-ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 13, height = 7) 
      

##################
#Specific Modules#
##################
 #  Module <- "steelblue"
 #  HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
 #  geom_tile() +
 #    scale_fill_gradientn(
 #      colours = c("steelblue1", "white", "red"),
 #      limit = c(-1,1),
 #      values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
 #  # scale_fill_gradient2(
 #  #   low = "steelblue1",
 #  #   high = "red",
 #  #   mid = "white",
 #  #   midpoint = 0,
 #  #   limit = c(-1,1))
 #  #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
 #  theme(axis.text.x = element_text(angle=90)) +
 #  labs(title = "", y = "Modules", fill="corr") +
 #  geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2),
 #                      size=3, colour="grey20") +
 #  geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2),
 #                      colour="grey20", size=3) +
 #   theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
 #      theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
 #      prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
 # A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
 #      plot(A)
 #      ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots,
 #             device='png', dpi=300, width = 2.8,height = 7)

4.6 Dataframe Module-Genes

RNA_Liver_WGCNA<- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))

InterestingComparison <- RNA_Liver_WGCNA$RNA_Liver_MEs
Module_Genes_list <- list()
for (MODULE in names(InterestingComparison)) {
  a <- length(Module_Genes_list)
  ExpressionSet <- RNA_Liver_WGCNA$RNA_Liver_omics_data
  moduleLabels  <- RNA_Liver_WGCNA$RNA_Liver_moduleLabels
  Annotations   <- SLUCGeneManual
  
  genes_of_interest <- as.data.frame(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))]))
  print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))
  colnames(genes_of_interest) <- "rowname"
  print(paste("Module genes before Conversion to EntrezIDs"))
  print(paste(MODULE, dim(genes_of_interest)[1]))
  
  ########################################
  #Add annotated Gene-Names to omics data#
  ########################################
  #print(paste("Lost by double dot to dot conversion somwhere in the pipeline"))
  #print(genes_of_interest$rowname[genes_of_interest$rowname %in% SLUCGeneManual$rowname == FALSE])
  
  AnnotationDbi_genes  <-dplyr::left_join(genes_of_interest, SLUCGeneManual)
  AnnotationDbi_genes$GeneSymbolHS <- AnnotationDbi_genes$Human_SYMBOL_Manual
  AnnotationDbi_genes2  <- as.data.frame(AnnotationDbi_genes$Human_SYMBOL_Manual)
  colnames(AnnotationDbi_genes2) <- "GeneSymbolHS"
  library(org.Hs.eg.db)
  HS<-AnnotationDbi::select(org.Hs.eg.db, AnnotationDbi_genes2$GeneSymbolHS, "ENTREZID","SYMBOL") #Get EntrezIDs from Symbol
  #CHECK!#
  subset(HS, is.na(HS$ENTREZID))
  dim(subset(HS, is.na(HS$ENTREZID)))
  AnnotationDbi_genes2 <- dplyr::left_join(AnnotationDbi_genes2, HS, by=c("GeneSymbolHS" = "SYMBOL"))
  dedup_ids = AnnotationDbi_genes2[!duplicated(AnnotationDbi_genes2[c("ENTREZID")]),]
  dedup_ids <- dedup_ids[!is.na(dedup_ids$ENTREZID),]
  print(paste("Module genes After Conversion to EntrezIDs"))
  print(paste(MODULE, dim(dedup_ids)[1]))
  #print(head(dedup_ids))
  A<- dplyr::left_join(AnnotationDbi_genes[c("Geneid", "GeneSymbolHS")], dedup_ids)
  A <- A[complete.cases(A$Geneid), ]
  
  Module_Genes_list[[a+1]] <- A
  names(Module_Genes_list)[[a+1]] <- paste("GeneAnno",MODULE, sep="_")
}
## [1] "si.ch211.235e9.8" "LOC116042180"     "surf2"            "ass1"            
## [5] "exosc2"           "zmat5"           
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME9 519"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME9 462"
## [1] "nocta"        "LOC116042192" "enpp6"        "csnk1a1"      "fbxo38"      
## [6] "matr3l1.2"   
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME1 3334"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME1 2690"
## [1] "LOC116035151" "rab11fip1b"   "LOC116035135" "rps20"        "bco1"        
## [6] "rpl7"        
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME6 716"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME6 604"
## [1] "glis3"           "si.dkey.106l3.7" "mtfr1"           "hhatlb"         
## [5] "LOC116052657"    "zic3"           
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME23 131"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME23 105"
## [1] "zmynd19"      "kcnk4a"       "LOC116052600" "armh1"        "nipbla"      
## [6] "rab11fip1a"  
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME31 88"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME31 78"
## [1] "LOC116051164" "maml1"        "mgat4b"       "nfkb1"        "rab14l"      
## [6] "myo1hb"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME7 696"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME7 614"
## [1] "LOC116042168" "LOC116042176" "LOC116035015" "LOC116052262" "grxcr1a"     
## [6] "LOC116052444"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME16 207"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME16 162"
## [1] "osbp2"           "LOC116038048"    "nsdhl"           "si.ch73.43g23.1"
## [5] "LOC116036481"    "LOC116036640"   
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME28 105"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME28 71"
## [1] "LOC116042212" "LOC116051161" "ddx54"        "nup155"       "orai1b"      
## [6] "drg1"        
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME5 758"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME5 672"
## [1] "LOC116055762" "LOC116052266" "fgf13a"       "arhgef37"     "slc7a2"      
## [6] "LOC116063251"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME32 72"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME32 52"
## [1] "arap3"        "b4galt1l"     "slc44a1b"     "wbp1"         "ric1"        
## [6] "LOC116034915"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME10 418"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME10 364"
## [1] "slc27a4"      "LOC116052570" "rnf20"        "efnb1"        "abhd17b"     
## [6] "ccser1"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME36 59"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME36 55"
## [1] "traf2b"    "pdlim5b"   "slc20a1b"  "nt5c2l1"   "arhgap29a" "pdk3a"    
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME18 166"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME18 146"
## [1] "fubp3"        "spata13"      "fosl1a"       "tspan17"      "LOC116062520"
## [6] "LOC116053905"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME24 129"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME24 120"
## [1] "LOC118496169" "wsb2"         "evpla"        "LOC116042958" "slc25a55a"   
## [6] "klhdc8a"     
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME38 33"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME38 28"
## [1] "LOC116054943" "arvcfa"       "zgc.56231"    "LOC116053930" "hip1rb"      
## [6] "LOC116056599"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME26 110"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME26 80"
## [1] "LOC116052304" "slu7"         "LOC118494832" "nyap2a"       "folr"        
## [6] "barx1"       
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME39 32"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME39 23"
## [1] "LOC116042200" "LOC116060251" "coro1cb"      "LOC116038006" "LOC116037943"
## [6] "cpa6"        
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME14 211"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME14 158"
## [1] "immt"         "LOC116042209" "canx"         "LOC116058051" "tmem129"     
## [6] "rnf130"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME2 2571"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME2 2250"
## [1] "ndufc1"       "polr2b"       "LOC116035023" "LOC118493028" "LOC116035002"
## [6] "plgrkt"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME8 553"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME8 501"
## [1] "vipr1b"       "LOC116038046" "spata4"       "LOC116052506" "LOC116052667"
## [6] "nme5"        
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME20 142"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME20 93"
## [1] "LOC116067369" "LOC116060837" "LOC116034991" "tescb"        "taok3b"      
## [6] "rhof"        
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME4 1013"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME4 644"
## [1] "LOC116035126" "LOC116034916" "LOC118495038" "LOC116052334" "tmem265"     
## [6] "LOC116052636"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME27 106"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME27 75"
## [1] "LOC116056516"     "gdnfa"            "stard7"           "LOC116048902"    
## [5] "stau2"            "si.dkey.237h12.3"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME19 148"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME19 116"
## [1] "cplane1"  "sardh"    "fam163ba" "nktr"     "vill"     "f9a"     
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME13 230"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME13 201"
## [1] "adora2ab" "phykpl"   "qdpra"    "naaladl1" "phf10"    "lipca"   
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME34 61"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME34 52"
## [1] "neurog1"      "LOC116060826" "ssh1b"        "cabp1b"       "LOC116048878"
## [6] "armc3"       
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME17 190"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME17 137"
## [1] "pttg1"          "kmt5ab"         "cenpf"          "prc1b"         
## [5] "plk1"           "si.dkey.23f9.4"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME37 56"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME37 49"
## [1] "kcnn2"        "LOC116055747" "hs6st2"       "LOC116036535" "f2rl2"       
## [6] "ube2z"       
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME33 69"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME33 52"
## [1] "LOC116042171" "npdc1a"       "dok6"         "LOC116037979" "lingo2"      
## [6] "zgc.153018"  
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME15 210"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME15 152"
## [1] "LOC118496450" "LOC116045012" "LOC116054311" "LOC116062328" "nkpd1"       
## [6] "galr1b"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME35 59"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME35 42"
## [1] "tmem119b" "gpat4"    "snx24"    "pigm"     "mars2"    "pcdh19"  
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME12 251"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME12 200"
## [1] "LOC116035179" "smtna"        "pde6b"        "pou5f3"       "clic3"       
## [6] "tfap2a"      
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME29 93"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME29 74"
## [1] "LOC116035122"    "spag6"           "cbln2b"          "si.ch73.22a13.3"
## [5] "smtnl1"          "adad1"          
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME30 92"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME30 71"
## [1] "pcdh18b"      "LOC118496138" "tgfbi"        "atoh8"        "LOC116034939"
## [6] "LOC116035088"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME11 401"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME11 338"
## [1] "LOC116046329" "eya1"         "LOC116064437" "zgc.66433"    "LOC116053883"
## [6] "tcf7l1b"     
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME22 134"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME22 121"
## [1] "LOC116035134" "cd226"        "c1h8orf34"    "LOC116052527" "LOC118492899"
## [6] "LOC118495324"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME21 140"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME21 100"
## [1] "slc7a11"      "slitrk2"      "sapcd2"       "LOC116034992" "LOC116035152"
## [6] "zdhhc8a"     
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME3 2354"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME3 1819"
## [1] "rc3h2"        "LOC116035127" "LOC116048911" "LOC116055741" "pip4k2aa"    
## [6] "LOC116052523"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME25 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME25 104"
## [1] "LOC116051166" "LOC116051167" "LOC118494724" "LOC116042187" "LOC116042190"
## [6] "LOC116042216"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME0 11284"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME0 4949"
Gene_counts <- list()
for (i in names(Module_Genes_list)) {
  a <- length(Gene_counts)
  g <- sub("GeneAnno_ME", "", i)
  A <- length(unique(na.omit(Module_Genes_list[[i]]$ENTREZID)))
  B <- length(na.omit(Module_Genes_list[[i]]$Geneid))
  C <- table(RNA_Liver_WGCNA$RNA_Liver_moduleLabels == g)[2]
  
  module_summary <- data.frame(Module = i, Module_Size = C, EnrezID = A)
  Gene_counts[[a+1]] <- module_summary}
print(do.call(rbind, Gene_counts))
##               Module Module_Size EnrezID
## TRUE    GeneAnno_ME9         519     462
## TRUE1   GeneAnno_ME1        3334    2690
## TRUE2   GeneAnno_ME6         716     604
## TRUE3  GeneAnno_ME23         131     105
## TRUE4  GeneAnno_ME31          88      78
## TRUE5   GeneAnno_ME7         696     614
## TRUE6  GeneAnno_ME16         207     162
## TRUE7  GeneAnno_ME28         105      71
## TRUE8   GeneAnno_ME5         758     672
## TRUE9  GeneAnno_ME32          72      52
## TRUE10 GeneAnno_ME10         418     364
## TRUE11 GeneAnno_ME36          59      55
## TRUE12 GeneAnno_ME18         166     146
## TRUE13 GeneAnno_ME24         129     120
## TRUE14 GeneAnno_ME38          33      28
## TRUE15 GeneAnno_ME26         110      80
## TRUE16 GeneAnno_ME39          32      23
## TRUE17 GeneAnno_ME14         211     158
## TRUE18  GeneAnno_ME2        2571    2250
## TRUE19  GeneAnno_ME8         553     501
## TRUE20 GeneAnno_ME20         142      93
## TRUE21  GeneAnno_ME4        1013     644
## TRUE22 GeneAnno_ME27         106      75
## TRUE23 GeneAnno_ME19         148     116
## TRUE24 GeneAnno_ME13         230     201
## TRUE25 GeneAnno_ME34          61      52
## TRUE26 GeneAnno_ME17         190     137
## TRUE27 GeneAnno_ME37          56      49
## TRUE28 GeneAnno_ME33          69      52
## TRUE29 GeneAnno_ME15         210     152
## TRUE30 GeneAnno_ME35          59      42
## TRUE31 GeneAnno_ME12         251     200
## TRUE32 GeneAnno_ME29          93      74
## TRUE33 GeneAnno_ME30          92      71
## TRUE34 GeneAnno_ME11         401     338
## TRUE35 GeneAnno_ME22         134     121
## TRUE36 GeneAnno_ME21         140     100
## TRUE37  GeneAnno_ME3        2354    1819
## TRUE38 GeneAnno_ME25         125     104
## TRUE39  GeneAnno_ME0       11284    4949
RNA_Liver_WGCNA$RNA_Liver_GeneAnno <- Module_Genes_list


saveRDS(RNA_Liver_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List_3", Date, sep="_"), ".rds", sep=""))))

#-